浏览代码

add chrome headless option and improve default data dir finding

Nick Sweeting 6 年之前
父节点
当前提交
1c1bc76ac1
共有 2 个文件被更改,包括 39 次插入9 次删除
  1. 38 9
      archivebox/archive_methods.py
  2. 1 0
      archivebox/config.py

+ 38 - 9
archivebox/archive_methods.py

@@ -31,6 +31,7 @@ from config import (
     COOKIES_FILE,
     WGET_USER_AGENT,
     CHROME_USER_DATA_DIR,
+    CHROME_HEADLESS,
     CHROME_SANDBOX,
     TIMEOUT,
     MEDIA_TIMEOUT,
@@ -613,14 +614,42 @@ def fetch_git(link_dir, link, timeout=TIMEOUT):
         'output': output,
     }
 
-def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR):
-    args = [binary, '--headless']
-    if not CHROME_SANDBOX:
+def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR, headless=CHROME_HEADLESS, sandbox=CHROME_SANDBOX):
+    global USER_DATA_DIR
+    user_data_dir = user_data_dir or USER_DATA_DIR
+    cmd_args = [binary]
+
+    if headless:
+        cmd_args += ('--headless',)
+    
+    if not sandbox:
         # dont use GPU or sandbox when running inside docker container
-        args += ['--no-sandbox', '--disable-gpu']
-    default_profile = os.path.expanduser('~/Library/Application Support/Google/Chrome')
+        cmd_args += ('--no-sandbox', '--disable-gpu')
+
+    
+    # Find chrome user data directory
+    default_profile_paths = (
+        '~/.config/chromium',
+        '~/.config/google-chrome',
+        '~/.config/google-chrome-beta',
+        '~/.config/google-chrome-unstable',
+        '~/Library/Application Support/Chromium',
+        '~/Library/Application Support/Google/Chrome',
+        '~/Library/Application Support/Google/Chrome Canary',
+        '~/AppData/Local/Chromium/User Data',
+        '~/AppData/Local/Google/Chrome/User Data',
+        '~/AppData/Local/Google/Chrome SxS/User Data',
+    )
     if user_data_dir:
-        args.append('--user-data-dir={}'.format(user_data_dir))
-    elif os.path.exists(default_profile):
-        args.append('--user-data-dir={}'.format(default_profile))
-    return args
+        cmd_args.append('--user-data-dir={}'.format(user_data_dir))
+    else:
+        for path in default_profile_paths:
+            full_path = os.path.expanduser(path)
+            if os.path.exists(full_path):
+                USER_DATA_DIR = full_path
+                cmd_args.append('--user-data-dir={}'.format(full_path))
+                break
+    return cmd_args
+
+
+USER_DATA_DIR = CHROME_USER_DATA_DIR

+ 1 - 0
archivebox/config.py

@@ -37,6 +37,7 @@ GIT_DOMAINS =            os.getenv('GIT_DOMAINS',            'github.com,bitbuck
 WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
 COOKIES_FILE =           os.getenv('COOKIES_FILE',           None)
 CHROME_USER_DATA_DIR =   os.getenv('CHROME_USER_DATA_DIR',   None)
+CHROME_HEADLESS =        os.getenv('CHROME_HEADLESS',        'True'             ).lower() == 'true'
 
 CURL_BINARY =            os.getenv('CURL_BINARY',            'curl')
 GIT_BINARY =             os.getenv('GIT_BINARY',             'git')