|
@@ -500,7 +500,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|
|
'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
|
|
'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
|
|
|
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
|
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
|
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
|
|
- 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
|
|
|
|
|
|
|
+ 'CHROME_USER_DATA_DIR': {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None},
|
|
|
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
|
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
|
|
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
|
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
|
|
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories
|
|
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories
|
|
@@ -910,27 +910,36 @@ def find_chrome_binary() -> Optional[str]:
|
|
|
|
|
|
|
|
def find_chrome_data_dir() -> Optional[str]:
|
|
def find_chrome_data_dir() -> Optional[str]:
|
|
|
"""find any installed chrome user data directories in the default locations"""
|
|
"""find any installed chrome user data directories in the default locations"""
|
|
|
- # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
|
|
|
|
- # make sure data dir finding precedence order always matches binary finding order
|
|
|
|
|
- default_profile_paths = (
|
|
|
|
|
- '~/.config/chromium',
|
|
|
|
|
- '~/Library/Application Support/Chromium',
|
|
|
|
|
- '~/AppData/Local/Chromium/User Data',
|
|
|
|
|
- '~/.config/chrome',
|
|
|
|
|
- '~/.config/google-chrome',
|
|
|
|
|
- '~/Library/Application Support/Google/Chrome',
|
|
|
|
|
- '~/AppData/Local/Google/Chrome/User Data',
|
|
|
|
|
- '~/.config/google-chrome-stable',
|
|
|
|
|
- '~/.config/google-chrome-beta',
|
|
|
|
|
- '~/Library/Application Support/Google/Chrome Canary',
|
|
|
|
|
- '~/AppData/Local/Google/Chrome SxS/User Data',
|
|
|
|
|
- '~/.config/google-chrome-unstable',
|
|
|
|
|
- '~/.config/google-chrome-dev',
|
|
|
|
|
- )
|
|
|
|
|
- for path in default_profile_paths:
|
|
|
|
|
- full_path = Path(path).resolve()
|
|
|
|
|
- if full_path.exists():
|
|
|
|
|
- return full_path
|
|
|
|
|
|
|
+ # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior.
|
|
|
|
|
+
|
|
|
|
|
+ # Going forward we want to discourage people from using their main chrome profile for archiving.
|
|
|
|
|
+ # Session tokens, personal data, and cookies are often returned in server responses,
|
|
|
|
|
+ # when they get archived, they are essentially burned as anyone who can view the archive
|
|
|
|
|
+ # can use that data to masquerade as the logged-in user that did the archiving.
|
|
|
|
|
+ # For this reason users should always create dedicated burner profiles for archiving and not use
|
|
|
|
|
+ # their daily driver main accounts.
|
|
|
|
|
+
|
|
|
|
|
+ # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
|
|
|
|
+ # # make sure data dir finding precedence order always matches binary finding order
|
|
|
|
|
+ # default_profile_paths = (
|
|
|
|
|
+ # '~/.config/chromium',
|
|
|
|
|
+ # '~/Library/Application Support/Chromium',
|
|
|
|
|
+ # '~/AppData/Local/Chromium/User Data',
|
|
|
|
|
+ # '~/.config/chrome',
|
|
|
|
|
+ # '~/.config/google-chrome',
|
|
|
|
|
+ # '~/Library/Application Support/Google/Chrome',
|
|
|
|
|
+ # '~/AppData/Local/Google/Chrome/User Data',
|
|
|
|
|
+ # '~/.config/google-chrome-stable',
|
|
|
|
|
+ # '~/.config/google-chrome-beta',
|
|
|
|
|
+ # '~/Library/Application Support/Google/Chrome Canary',
|
|
|
|
|
+ # '~/AppData/Local/Google/Chrome SxS/User Data',
|
|
|
|
|
+ # '~/.config/google-chrome-unstable',
|
|
|
|
|
+ # '~/.config/google-chrome-dev',
|
|
|
|
|
+ # )
|
|
|
|
|
+ # for path in default_profile_paths:
|
|
|
|
|
+ # full_path = Path(path).resolve()
|
|
|
|
|
+ # if full_path.exists():
|
|
|
|
|
+ # return full_path
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
def wget_supports_compression(config):
|
|
def wget_supports_compression(config):
|