| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508 |
- #!/usr/bin/env node
- /**
- * Chrome Extension Management Utilities
- *
- * Handles downloading, installing, and managing Chrome extensions for browser automation.
- * Ported from the TypeScript implementation in archivebox.ts
- */
- const fs = require('fs');
- const path = require('path');
- const crypto = require('crypto');
- const http = require('http');
- const net = require('net');
- const { exec, spawn } = require('child_process');
- const { promisify } = require('util');
- const { Readable } = require('stream');
- const { finished } = require('stream/promises');
- const execAsync = promisify(exec);
- // ============================================================================
- // Environment helpers
- // ============================================================================
- /**
- * Get environment variable with default value.
- * @param {string} name - Environment variable name
- * @param {string} [defaultValue=''] - Default value if not set
- * @returns {string} - Trimmed environment variable value
- */
- function getEnv(name, defaultValue = '') {
- return (process.env[name] || defaultValue).trim();
- }
- /**
- * Get boolean environment variable.
- * @param {string} name - Environment variable name
- * @param {boolean} [defaultValue=false] - Default value if not set
- * @returns {boolean} - Boolean value
- */
- function getEnvBool(name, defaultValue = false) {
- const val = getEnv(name, '').toLowerCase();
- if (['true', '1', 'yes', 'on'].includes(val)) return true;
- if (['false', '0', 'no', 'off'].includes(val)) return false;
- return defaultValue;
- }
- /**
- * Get integer environment variable.
- * @param {string} name - Environment variable name
- * @param {number} [defaultValue=0] - Default value if not set
- * @returns {number} - Integer value
- */
- function getEnvInt(name, defaultValue = 0) {
- const val = parseInt(getEnv(name, String(defaultValue)), 10);
- return isNaN(val) ? defaultValue : val;
- }
- /**
- * Get array environment variable (JSON array or comma-separated string).
- *
- * Parsing priority:
- * 1. JSON array format (recommended): '["--arg1=value", "--arg2=value,with,commas"]'
- * 2. Comma-before-flag format: '--arg1=value,--arg2=value' (splits on ,-- pattern)
- * 3. Single Chrome flag: '--flag=value,with,commas' (no split, returns as single item)
- * 4. Simple comma-separated: 'a,b,c' (splits on commas)
- *
- * For Chrome arguments that may contain internal commas, use JSON format:
- * CHROME_ARGS='["--user-data-dir=/path/with,comma", "--window-size=1440,900"]'
- *
- * @param {string} name - Environment variable name
- * @param {string[]} [defaultValue=[]] - Default value if not set
- * @returns {string[]} - Array of strings
- */
- function getEnvArray(name, defaultValue = []) {
- const val = getEnv(name, '');
- if (!val) return defaultValue;
- // Try parsing as JSON array first (recommended format for args with commas)
- if (val.startsWith('[')) {
- try {
- const parsed = JSON.parse(val);
- if (Array.isArray(parsed)) return parsed;
- } catch (e) {
- // Fall through to comma-separated parsing
- }
- }
- // Parse as comma-separated (but be careful with args that contain commas)
- // For Chrome args, we split on comma followed by '--' to be safe
- if (val.includes(',--')) {
- return val.split(/,(?=--)/).map(s => s.trim()).filter(Boolean);
- }
- // If the value looks like a single Chrome flag (starts with -- and contains no ,--),
- // treat the entire value as a single argument to avoid splitting internal commas
- // e.g., '--user-data-dir=/path/with,comma/in/it' should not be split
- if (val.startsWith('--') && !val.includes(',--')) {
- return [val.trim()];
- }
- // Simple comma-separated (for non-Chrome-flag values like 'a,b,c')
- return val.split(',').map(s => s.trim()).filter(Boolean);
- }
- /**
- * Parse resolution string into width/height.
- * @param {string} resolution - Resolution string like "1440,2000"
- * @returns {{width: number, height: number}} - Parsed dimensions
- */
- function parseResolution(resolution) {
- const [width, height] = resolution.split(',').map(x => parseInt(x.trim(), 10));
- return { width: width || 1440, height: height || 2000 };
- }
- // ============================================================================
- // PID file management
- // ============================================================================
- /**
- * Write PID file with specific mtime for process validation.
- * @param {string} filePath - Path to PID file
- * @param {number} pid - Process ID
- * @param {number} startTimeSeconds - Process start time in seconds
- */
- function writePidWithMtime(filePath, pid, startTimeSeconds) {
- fs.writeFileSync(filePath, String(pid));
- const startTimeMs = startTimeSeconds * 1000;
- fs.utimesSync(filePath, new Date(startTimeMs), new Date(startTimeMs));
- }
- /**
- * Write a shell script that can re-run the Chrome command.
- * @param {string} filePath - Path to script file
- * @param {string} binary - Chrome binary path
- * @param {string[]} args - Chrome arguments
- */
- function writeCmdScript(filePath, binary, args) {
- const escape = (arg) =>
- arg.includes(' ') || arg.includes('"') || arg.includes('$')
- ? `"${arg.replace(/"/g, '\\"')}"`
- : arg;
- fs.writeFileSync(
- filePath,
- `#!/bin/bash\n${binary} ${args.map(escape).join(' ')}\n`
- );
- fs.chmodSync(filePath, 0o755);
- }
- // ============================================================================
- // Port management
- // ============================================================================
- /**
- * Find a free port on localhost.
- * @returns {Promise<number>} - Available port number
- */
- function findFreePort() {
- return new Promise((resolve, reject) => {
- const server = net.createServer();
- server.unref();
- server.on('error', reject);
- server.listen(0, () => {
- const port = server.address().port;
- server.close(() => resolve(port));
- });
- });
- }
- /**
- * Wait for Chrome's DevTools port to be ready.
- * @param {number} port - Debug port number
- * @param {number} [timeout=30000] - Timeout in milliseconds
- * @returns {Promise<Object>} - Chrome version info
- */
- function waitForDebugPort(port, timeout = 30000) {
- const startTime = Date.now();
- return new Promise((resolve, reject) => {
- const tryConnect = () => {
- if (Date.now() - startTime > timeout) {
- reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
- return;
- }
- const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
- let data = '';
- res.on('data', (chunk) => (data += chunk));
- res.on('end', () => {
- try {
- const info = JSON.parse(data);
- resolve(info);
- } catch (e) {
- setTimeout(tryConnect, 100);
- }
- });
- });
- req.on('error', () => {
- setTimeout(tryConnect, 100);
- });
- req.setTimeout(1000, () => {
- req.destroy();
- setTimeout(tryConnect, 100);
- });
- };
- tryConnect();
- });
- }
- // ============================================================================
- // Zombie process cleanup
- // ============================================================================
- /**
- * Kill zombie Chrome processes from stale crawls.
- * Scans DATA_DIR/crawls/<crawl_id>/chrome/<name>.pid for stale processes.
- * @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.')
- * @returns {number} - Number of zombies killed
- */
- function killZombieChrome(dataDir = null) {
- dataDir = dataDir || getEnv('DATA_DIR', '.');
- const crawlsDir = path.join(dataDir, 'crawls');
- const now = Date.now();
- const fiveMinutesAgo = now - 300000;
- let killed = 0;
- console.error('[*] Checking for zombie Chrome processes...');
- if (!fs.existsSync(crawlsDir)) {
- console.error('[+] No crawls directory found');
- return 0;
- }
- try {
- const crawls = fs.readdirSync(crawlsDir, { withFileTypes: true });
- for (const crawl of crawls) {
- if (!crawl.isDirectory()) continue;
- const crawlDir = path.join(crawlsDir, crawl.name);
- const chromeDir = path.join(crawlDir, 'chrome');
- if (!fs.existsSync(chromeDir)) continue;
- // Check if crawl was modified recently (still active)
- try {
- const crawlStats = fs.statSync(crawlDir);
- if (crawlStats.mtimeMs > fiveMinutesAgo) {
- continue;
- }
- } catch (e) {
- continue;
- }
- // Crawl is stale, check for PIDs
- try {
- const pidFiles = fs.readdirSync(chromeDir).filter(f => f.endsWith('.pid'));
- for (const pidFileName of pidFiles) {
- const pidFile = path.join(chromeDir, pidFileName);
- try {
- const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
- if (isNaN(pid) || pid <= 0) continue;
- // Check if process exists
- try {
- process.kill(pid, 0);
- } catch (e) {
- // Process dead, remove stale PID file
- try { fs.unlinkSync(pidFile); } catch (e) {}
- continue;
- }
- // Process alive and crawl is stale - zombie!
- console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${crawl.name}`);
- try {
- try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
- killed++;
- console.error(`[+] Killed zombie (PID ${pid})`);
- try { fs.unlinkSync(pidFile); } catch (e) {}
- } catch (e) {
- console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
- }
- } catch (e) {
- // Skip invalid PID files
- }
- }
- } catch (e) {
- // Skip if can't read chrome dir
- }
- }
- } catch (e) {
- console.error(`[!] Error scanning crawls: ${e.message}`);
- }
- if (killed > 0) {
- console.error(`[+] Killed ${killed} zombie process(es)`);
- } else {
- console.error('[+] No zombies found');
- }
- // Clean up stale SingletonLock files from persona chrome_user_data directories
- const personasDir = path.join(dataDir, 'personas');
- if (fs.existsSync(personasDir)) {
- try {
- const personas = fs.readdirSync(personasDir, { withFileTypes: true });
- for (const persona of personas) {
- if (!persona.isDirectory()) continue;
- const userDataDir = path.join(personasDir, persona.name, 'chrome_user_data');
- const singletonLock = path.join(userDataDir, 'SingletonLock');
- if (fs.existsSync(singletonLock)) {
- try {
- fs.unlinkSync(singletonLock);
- console.error(`[+] Removed stale SingletonLock: ${singletonLock}`);
- } catch (e) {
- // Ignore - may be in use by active Chrome
- }
- }
- }
- } catch (e) {
- // Ignore errors scanning personas directory
- }
- }
- return killed;
- }
- // ============================================================================
- // Chrome launching
- // ============================================================================
- /**
- * Launch Chromium with extensions and return connection info.
- *
- * @param {Object} options - Launch options
- * @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
- * @param {string} [options.outputDir='chrome'] - Directory for output files
- * @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
- * @param {string} [options.resolution='1440,2000'] - Window resolution
- * @param {boolean} [options.headless=true] - Run in headless mode
- * @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
- * @param {boolean} [options.checkSsl=true] - Check SSL certificates
- * @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
- * @param {boolean} [options.killZombies=true] - Kill zombie processes first
- * @returns {Promise<Object>} - {success, cdpUrl, pid, port, process, error}
- */
- async function launchChromium(options = {}) {
- const {
- binary = findChromium(),
- outputDir = 'chrome',
- userDataDir = getEnv('CHROME_USER_DATA_DIR'),
- resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
- headless = getEnvBool('CHROME_HEADLESS', true),
- sandbox = getEnvBool('CHROME_SANDBOX', true),
- checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
- extensionPaths = [],
- killZombies = true,
- } = options;
- if (!binary) {
- return { success: false, error: 'Chrome binary not found' };
- }
- // Kill zombies first
- if (killZombies) {
- killZombieChrome();
- }
- const { width, height } = parseResolution(resolution);
- // Create output directory
- if (!fs.existsSync(outputDir)) {
- fs.mkdirSync(outputDir, { recursive: true });
- }
- // Create user data directory if specified and doesn't exist
- if (userDataDir) {
- if (!fs.existsSync(userDataDir)) {
- fs.mkdirSync(userDataDir, { recursive: true });
- console.error(`[*] Created user data directory: ${userDataDir}`);
- }
- // Clean up any stale SingletonLock file from previous crashed sessions
- const singletonLock = path.join(userDataDir, 'SingletonLock');
- if (fs.existsSync(singletonLock)) {
- try {
- fs.unlinkSync(singletonLock);
- console.error(`[*] Removed stale SingletonLock: ${singletonLock}`);
- } catch (e) {
- console.error(`[!] Failed to remove SingletonLock: ${e.message}`);
- }
- }
- }
- // Find a free port
- const debugPort = await findFreePort();
- console.error(`[*] Using debug port: ${debugPort}`);
- // Get base Chrome args from config (static flags from CHROME_ARGS env var)
- // These come from config.json defaults, merged by get_config() in Python
- const baseArgs = getEnvArray('CHROME_ARGS', []);
- // Get extra user-provided args
- const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
- // Build dynamic Chrome arguments (these must be computed at runtime)
- const dynamicArgs = [
- // Remote debugging setup
- `--remote-debugging-port=${debugPort}`,
- '--remote-debugging-address=127.0.0.1',
- // Sandbox settings (disable in Docker)
- ...(sandbox ? [] : ['--no-sandbox', '--disable-setuid-sandbox']),
- // Docker-specific workarounds
- '--disable-dev-shm-usage',
- '--disable-gpu',
- // Window size
- `--window-size=${width},${height}`,
- // User data directory (for persistent sessions with persona)
- ...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
- // Headless mode
- ...(headless ? ['--headless=new'] : []),
- // SSL certificate checking
- ...(checkSsl ? [] : ['--ignore-certificate-errors']),
- ];
- // Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
- // Dynamic args come after base so they can override if needed
- const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
- // Add extension loading flags
- if (extensionPaths.length > 0) {
- const extPathsArg = extensionPaths.join(',');
- chromiumArgs.push(`--load-extension=${extPathsArg}`);
- chromiumArgs.push('--enable-unsafe-extension-debugging');
- chromiumArgs.push('--disable-features=DisableLoadExtensionCommandLineSwitch,ExtensionManifestV2Unsupported,ExtensionManifestV2Disabled');
- console.error(`[*] Loading ${extensionPaths.length} extension(s) via --load-extension`);
- }
- chromiumArgs.push('about:blank');
- // Write command script for debugging
- writeCmdScript(path.join(outputDir, 'cmd.sh'), binary, chromiumArgs);
- try {
- console.error(`[*] Spawning Chromium (headless=${headless})...`);
- const chromiumProcess = spawn(binary, chromiumArgs, {
- stdio: ['ignore', 'pipe', 'pipe'],
- detached: true,
- });
- const chromePid = chromiumProcess.pid;
- const chromeStartTime = Date.now() / 1000;
- if (chromePid) {
- console.error(`[*] Chromium spawned (PID: ${chromePid})`);
- writePidWithMtime(path.join(outputDir, 'chrome.pid'), chromePid, chromeStartTime);
- }
- // Pipe Chrome output to stderr
- chromiumProcess.stdout.on('data', (data) => {
- process.stderr.write(`[chromium:stdout] ${data}`);
- });
- chromiumProcess.stderr.on('data', (data) => {
- process.stderr.write(`[chromium:stderr] ${data}`);
- });
- // Wait for debug port
- console.error(`[*] Waiting for debug port ${debugPort}...`);
- const versionInfo = await waitForDebugPort(debugPort, 30000);
- const wsUrl = versionInfo.webSocketDebuggerUrl;
- console.error(`[+] Chromium ready: ${wsUrl}`);
- fs.writeFileSync(path.join(outputDir, 'cdp_url.txt'), wsUrl);
- fs.writeFileSync(path.join(outputDir, 'port.txt'), String(debugPort));
- return {
- success: true,
- cdpUrl: wsUrl,
- pid: chromePid,
- port: debugPort,
- process: chromiumProcess,
- };
- } catch (e) {
- return { success: false, error: `${e.name}: ${e.message}` };
- }
- }
- /**
- * Check if a process is still running.
- * @param {number} pid - Process ID to check
- * @returns {boolean} - True if process exists
- */
- function isProcessAlive(pid) {
- try {
- process.kill(pid, 0); // Signal 0 checks existence without killing
- return true;
- } catch (e) {
- return false;
- }
- }
- /**
- * Find all Chrome child processes for a given debug port.
- * @param {number} port - Debug port number
- * @returns {Array<number>} - Array of PIDs
- */
- function findChromeProcessesByPort(port) {
- const { execSync } = require('child_process');
- const pids = [];
- try {
- // Find all Chrome processes using this debug port
- const output = execSync(
- `ps aux | grep -i "chrome.*--remote-debugging-port=${port}" | grep -v grep | awk '{print $2}'`,
- { encoding: 'utf8', timeout: 5000 }
- );
- for (const line of output.split('\n')) {
- const pid = parseInt(line.trim(), 10);
- if (!isNaN(pid) && pid > 0) {
- pids.push(pid);
- }
- }
- } catch (e) {
- // Command failed or no processes found
- }
- return pids;
- }
- /**
- * Kill a Chrome process by PID.
- * Always sends SIGTERM before SIGKILL, then verifies death.
- *
- * @param {number} pid - Process ID to kill
- * @param {string} [outputDir] - Directory containing PID files to clean up
- */
- async function killChrome(pid, outputDir = null) {
- if (!pid) return;
- console.error(`[*] Killing Chrome process tree (PID ${pid})...`);
- // Get debug port for finding child processes
- let debugPort = null;
- if (outputDir) {
- try {
- const portFile = path.join(outputDir, 'port.txt');
- if (fs.existsSync(portFile)) {
- debugPort = parseInt(fs.readFileSync(portFile, 'utf8').trim(), 10);
- }
- } catch (e) {}
- }
- // Step 1: SIGTERM to process group (graceful shutdown)
- console.error(`[*] Sending SIGTERM to process group -${pid}...`);
- try {
- process.kill(-pid, 'SIGTERM');
- } catch (e) {
- try {
- console.error(`[*] Process group kill failed, trying single process...`);
- process.kill(pid, 'SIGTERM');
- } catch (e2) {
- console.error(`[!] SIGTERM failed: ${e2.message}`);
- }
- }
- // Step 2: Wait for graceful shutdown
- await new Promise(resolve => setTimeout(resolve, 2000));
- // Step 3: Check if still alive
- if (!isProcessAlive(pid)) {
- console.error('[+] Chrome process terminated gracefully');
- } else {
- // Step 4: Force kill ENTIRE process group with SIGKILL
- console.error(`[*] Process still alive, sending SIGKILL to process group -${pid}...`);
- try {
- process.kill(-pid, 'SIGKILL'); // Kill entire process group
- } catch (e) {
- console.error(`[!] Process group SIGKILL failed, trying single process: ${e.message}`);
- try {
- process.kill(pid, 'SIGKILL');
- } catch (e2) {
- console.error(`[!] SIGKILL failed: ${e2.message}`);
- }
- }
- // Step 5: Wait briefly and verify death
- await new Promise(resolve => setTimeout(resolve, 1000));
- if (isProcessAlive(pid)) {
- console.error(`[!] WARNING: Process ${pid} is unkillable (likely in UNE state)`);
- console.error(`[!] This typically happens when Chrome crashes in kernel syscall`);
- console.error(`[!] Process will remain as zombie until system reboot`);
- console.error(`[!] macOS IOSurface crash creates unkillable processes in UNE state`);
- // Try one more time to kill the entire process group
- if (debugPort) {
- const relatedPids = findChromeProcessesByPort(debugPort);
- if (relatedPids.length > 1) {
- console.error(`[*] Found ${relatedPids.length} Chrome processes still running on port ${debugPort}`);
- console.error(`[*] Attempting final process group SIGKILL...`);
- // Try to kill each unique process group we find
- const processGroups = new Set();
- for (const relatedPid of relatedPids) {
- if (relatedPid !== pid) {
- processGroups.add(relatedPid);
- }
- }
- for (const groupPid of processGroups) {
- try {
- process.kill(-groupPid, 'SIGKILL');
- } catch (e) {}
- }
- }
- }
- } else {
- console.error('[+] Chrome process group killed successfully');
- }
- }
- // Step 8: Clean up PID files
- if (outputDir) {
- try { fs.unlinkSync(path.join(outputDir, 'chrome.pid')); } catch (e) {}
- try { fs.unlinkSync(path.join(outputDir, 'hook.pid')); } catch (e) {}
- }
- console.error('[*] Chrome cleanup completed');
- }
- /**
- * Install Chromium using @puppeteer/browsers programmatic API.
- * Uses puppeteer's default cache location, returns the binary path.
- *
- * @param {Object} options - Install options
- * @returns {Promise<Object>} - {success, binary, version, error}
- */
- async function installChromium(options = {}) {
- // Check if CHROME_BINARY is already set and valid
- const configuredBinary = getEnv('CHROME_BINARY');
- if (configuredBinary && fs.existsSync(configuredBinary)) {
- console.error(`[+] Using configured CHROME_BINARY: ${configuredBinary}`);
- return { success: true, binary: configuredBinary, version: null };
- }
- // Try to load @puppeteer/browsers from NODE_MODULES_DIR or system
- let puppeteerBrowsers;
- try {
- if (process.env.NODE_MODULES_DIR) {
- module.paths.unshift(process.env.NODE_MODULES_DIR);
- }
- puppeteerBrowsers = require('@puppeteer/browsers');
- } catch (e) {
- console.error(`[!] @puppeteer/browsers not found. Install it first with installPuppeteerCore.`);
- return { success: false, error: '@puppeteer/browsers not installed' };
- }
- console.error(`[*] Installing Chromium via @puppeteer/browsers...`);
- try {
- const result = await puppeteerBrowsers.install({
- browser: 'chromium',
- buildId: 'latest',
- });
- const binary = result.executablePath;
- const version = result.buildId;
- if (!binary || !fs.existsSync(binary)) {
- console.error(`[!] Chromium binary not found at: ${binary}`);
- return { success: false, error: `Chromium binary not found at: ${binary}` };
- }
- console.error(`[+] Chromium installed: ${binary}`);
- return { success: true, binary, version };
- } catch (e) {
- console.error(`[!] Failed to install Chromium: ${e.message}`);
- return { success: false, error: e.message };
- }
- }
- /**
- * Install puppeteer-core npm package.
- *
- * @param {Object} options - Install options
- * @param {string} [options.npmPrefix] - npm prefix directory (default: DATA_DIR/lib/<arch>/npm or ./node_modules parent)
- * @param {number} [options.timeout=60000] - Timeout in milliseconds
- * @returns {Promise<Object>} - {success, path, error}
- */
- async function installPuppeteerCore(options = {}) {
- const arch = `${process.arch}-${process.platform}`;
- const defaultPrefix = path.join(getEnv('LIB_DIR', getEnv('DATA_DIR', '.')), 'npm');
- const {
- npmPrefix = defaultPrefix,
- timeout = 60000,
- } = options;
- const nodeModulesDir = path.join(npmPrefix, 'node_modules');
- const puppeteerPath = path.join(nodeModulesDir, 'puppeteer-core');
- // Check if already installed
- if (fs.existsSync(puppeteerPath)) {
- console.error(`[+] puppeteer-core already installed: ${puppeteerPath}`);
- return { success: true, path: puppeteerPath };
- }
- console.error(`[*] Installing puppeteer-core to ${npmPrefix}...`);
- // Create directory
- if (!fs.existsSync(npmPrefix)) {
- fs.mkdirSync(npmPrefix, { recursive: true });
- }
- try {
- const { execSync } = require('child_process');
- execSync(
- `npm install --prefix "${npmPrefix}" puppeteer-core`,
- { encoding: 'utf8', timeout, stdio: ['pipe', 'pipe', 'pipe'] }
- );
- console.error(`[+] puppeteer-core installed successfully`);
- return { success: true, path: puppeteerPath };
- } catch (e) {
- console.error(`[!] Failed to install puppeteer-core: ${e.message}`);
- return { success: false, error: e.message };
- }
- }
- // Try to import unzipper, fallback to system unzip if not available
- let unzip = null;
- try {
- const unzipper = require('unzipper');
- unzip = async (sourcePath, destPath) => {
- const stream = fs.createReadStream(sourcePath).pipe(unzipper.Extract({ path: destPath }));
- return stream.promise();
- };
- } catch (err) {
- // Will use system unzip command as fallback
- }
- /**
- * Compute the extension ID from the unpacked path.
- * Chrome uses a SHA256 hash of the unpacked extension directory path to compute a dynamic id.
- *
- * @param {string} unpacked_path - Path to the unpacked extension directory
- * @returns {string} - 32-character extension ID
- */
- function getExtensionId(unpacked_path) {
- // Chrome uses a SHA256 hash of the unpacked extension directory path
- const hash = crypto.createHash('sha256');
- hash.update(Buffer.from(unpacked_path, 'utf-8'));
- // Convert first 32 hex chars to characters in the range 'a'-'p'
- const detected_extension_id = Array.from(hash.digest('hex'))
- .slice(0, 32)
- .map(i => String.fromCharCode(parseInt(i, 16) + 'a'.charCodeAt(0)))
- .join('');
- return detected_extension_id;
- }
- /**
- * Download and install a Chrome extension from the Chrome Web Store.
- *
- * @param {Object} extension - Extension metadata object
- * @param {string} extension.webstore_id - Chrome Web Store extension ID
- * @param {string} extension.name - Human-readable extension name
- * @param {string} extension.crx_url - URL to download the CRX file
- * @param {string} extension.crx_path - Local path to save the CRX file
- * @param {string} extension.unpacked_path - Path to extract the extension
- * @returns {Promise<boolean>} - True if installation succeeded
- */
- async function installExtension(extension) {
- const manifest_path = path.join(extension.unpacked_path, 'manifest.json');
- // Download CRX file if not already downloaded
- if (!fs.existsSync(manifest_path) && !fs.existsSync(extension.crx_path)) {
- console.log(`[🛠️] Downloading missing extension ${extension.name} ${extension.webstore_id} -> ${extension.crx_path}`);
- try {
- // Ensure parent directory exists
- const crxDir = path.dirname(extension.crx_path);
- if (!fs.existsSync(crxDir)) {
- fs.mkdirSync(crxDir, { recursive: true });
- }
- // Download CRX file from Chrome Web Store
- const response = await fetch(extension.crx_url);
- if (!response.ok) {
- console.warn(`[⚠️] Failed to download extension ${extension.name}: HTTP ${response.status}`);
- return false;
- }
- if (response.body) {
- const crx_file = fs.createWriteStream(extension.crx_path);
- const crx_stream = Readable.fromWeb(response.body);
- await finished(crx_stream.pipe(crx_file));
- } else {
- console.warn(`[⚠️] Failed to download extension ${extension.name}: No response body`);
- return false;
- }
- } catch (err) {
- console.error(`[❌] Failed to download extension ${extension.name}:`, err);
- return false;
- }
- }
- // Unzip CRX file to unpacked_path (CRX files have extra header bytes but unzip handles it)
- await fs.promises.mkdir(extension.unpacked_path, { recursive: true });
- try {
- // Use -q to suppress warnings about extra bytes in CRX header
- await execAsync(`/usr/bin/unzip -q -o "${extension.crx_path}" -d "${extension.unpacked_path}"`);
- } catch (err1) {
- // unzip may return non-zero even on success due to CRX header warning, check if manifest exists
- if (!fs.existsSync(manifest_path)) {
- if (unzip) {
- // Fallback to unzipper library
- try {
- await unzip(extension.crx_path, extension.unpacked_path);
- } catch (err2) {
- console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err2.message);
- return false;
- }
- } else {
- console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message);
- return false;
- }
- }
- }
- if (!fs.existsSync(manifest_path)) {
- console.error(`[❌] Failed to install ${extension.crx_path}: could not find manifest.json in unpacked_path`);
- return false;
- }
- return true;
- }
- /**
- * Load or install a Chrome extension, computing all metadata.
- *
- * @param {Object} ext - Partial extension metadata (at minimum: webstore_id or unpacked_path)
- * @param {string} [ext.webstore_id] - Chrome Web Store extension ID
- * @param {string} [ext.name] - Human-readable extension name
- * @param {string} [ext.unpacked_path] - Path to unpacked extension
- * @param {string} [extensions_dir] - Directory to store extensions
- * @returns {Promise<Object>} - Complete extension metadata object
- */
- async function loadOrInstallExtension(ext, extensions_dir = null) {
- if (!(ext.webstore_id || ext.unpacked_path)) {
- throw new Error('Extension must have either {webstore_id} or {unpacked_path}');
- }
- // Determine extensions directory
- const EXTENSIONS_DIR = extensions_dir || process.env.CHROME_EXTENSIONS_DIR || './data/chrome_extensions';
- // Set statically computable extension metadata
- ext.webstore_id = ext.webstore_id || ext.id;
- ext.name = ext.name || ext.webstore_id;
- ext.webstore_url = ext.webstore_url || `https://chromewebstore.google.com/detail/${ext.webstore_id}`;
- ext.crx_url = ext.crx_url || `https://clients2.google.com/service/update2/crx?response=redirect&prodversion=1230&acceptformat=crx3&x=id%3D${ext.webstore_id}%26uc`;
- ext.crx_path = ext.crx_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}.crx`);
- ext.unpacked_path = ext.unpacked_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}`);
- const manifest_path = path.join(ext.unpacked_path, 'manifest.json');
- ext.read_manifest = () => JSON.parse(fs.readFileSync(manifest_path, 'utf-8'));
- ext.read_version = () => fs.existsSync(manifest_path) && ext.read_manifest()?.version || null;
- // If extension is not installed, download and unpack it
- if (!ext.read_version()) {
- await installExtension(ext);
- }
- // Autodetect ID from filesystem path (unpacked extensions don't have stable IDs)
- ext.id = getExtensionId(ext.unpacked_path);
- ext.version = ext.read_version();
- if (!ext.version) {
- console.warn(`[❌] Unable to detect ID and version of installed extension ${ext.unpacked_path}`);
- } else {
- console.log(`[➕] Installed extension ${ext.name} (${ext.version})... ${ext.unpacked_path}`);
- }
- return ext;
- }
- /**
- * Check if a Puppeteer target is an extension background page/service worker.
- *
- * @param {Object} target - Puppeteer target object
- * @returns {Promise<Object>} - Object with target_is_bg, extension_id, manifest_version, etc.
- */
- async function isTargetExtension(target) {
- let target_type;
- let target_ctx;
- let target_url;
- try {
- target_type = target.type();
- target_ctx = (await target.worker()) || (await target.page()) || null;
- target_url = target.url() || target_ctx?.url() || null;
- } catch (err) {
- if (String(err).includes('No target with given id found')) {
- // Target closed during check, ignore harmless race condition
- target_type = 'closed';
- target_ctx = null;
- target_url = 'about:closed';
- } else {
- throw err;
- }
- }
- // Check if this is an extension background page or service worker
- const is_chrome_extension = target_url?.startsWith('chrome-extension://');
- const is_background_page = target_type === 'background_page';
- const is_service_worker = target_type === 'service_worker';
- const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker);
- let extension_id = null;
- let manifest_version = null;
- const target_is_extension = is_chrome_extension || target_is_bg;
- if (target_is_extension) {
- try {
- extension_id = target_url?.split('://')[1]?.split('/')[0] || null;
- if (target_ctx) {
- const manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
- manifest_version = manifest?.manifest_version || null;
- }
- } catch (err) {
- // Failed to get extension metadata
- }
- }
- return {
- target_is_extension,
- target_is_bg,
- target_type,
- target_ctx,
- target_url,
- extension_id,
- manifest_version,
- };
- }
- /**
- * Load extension metadata and connection handlers from a browser target.
- *
- * @param {Array} extensions - Array of extension metadata objects to update
- * @param {Object} target - Puppeteer target object
- * @returns {Promise<Object|null>} - Updated extension object or null if not an extension
- */
- async function loadExtensionFromTarget(extensions, target) {
- const {
- target_is_bg,
- target_is_extension,
- target_type,
- target_ctx,
- target_url,
- extension_id,
- manifest_version,
- } = await isTargetExtension(target);
- if (!(target_is_bg && extension_id && target_ctx)) {
- return null;
- }
- // Find matching extension in our list
- const extension = extensions.find(ext => ext.id === extension_id);
- if (!extension) {
- console.warn(`[⚠️] Found loaded extension ${extension_id} that's not in CHROME_EXTENSIONS list`);
- return null;
- }
- // Load manifest from the extension context
- let manifest = null;
- try {
- manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
- } catch (err) {
- console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err);
- return null;
- }
- // Create dispatch methods for communicating with the extension
- const new_extension = {
- ...extension,
- target,
- target_type,
- target_url,
- manifest,
- manifest_version,
- // Trigger extension toolbar button click
- dispatchAction: async (tab) => {
- return await target_ctx.evaluate((tabId) => {
- return new Promise((resolve) => {
- chrome.action.onClicked.addListener((tab) => {
- resolve({ success: true, tab });
- });
- chrome.action.openPopup();
- });
- }, tab?.id || null);
- },
- // Send message to extension
- dispatchMessage: async (message, options = {}) => {
- return await target_ctx.evaluate((msg, opts) => {
- return new Promise((resolve) => {
- chrome.runtime.sendMessage(msg, opts, (response) => {
- resolve(response);
- });
- });
- }, message, options);
- },
- // Trigger extension command (keyboard shortcut)
- dispatchCommand: async (command) => {
- return await target_ctx.evaluate((cmd) => {
- return new Promise((resolve) => {
- chrome.commands.onCommand.addListener((receivedCommand) => {
- if (receivedCommand === cmd) {
- resolve({ success: true, command: receivedCommand });
- }
- });
- // Note: Actually triggering commands programmatically is not directly supported
- // This would need to be done via CDP or keyboard simulation
- });
- }, command);
- },
- };
- // Update the extension in the array
- Object.assign(extension, new_extension);
- console.log(`[🔌] Connected to extension ${extension.name} (${extension.version})`);
- return new_extension;
- }
- /**
- * Install all extensions in the list if not already installed.
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @param {string} [extensions_dir] - Directory to store extensions
- * @returns {Promise<Array>} - Array of installed extension objects
- */
- async function installAllExtensions(extensions, extensions_dir = null) {
- console.log(`[⚙️] Installing ${extensions.length} chrome extensions...`);
- for (const extension of extensions) {
- await loadOrInstallExtension(extension, extensions_dir);
- }
- return extensions;
- }
- /**
- * Load and connect to all extensions from a running browser.
- *
- * @param {Object} browser - Puppeteer browser instance
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Promise<Array>} - Array of loaded extension objects with connection handlers
- */
- async function loadAllExtensionsFromBrowser(browser, extensions) {
- console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`);
- // Find loaded extensions at runtime by examining browser targets
- for (const target of browser.targets()) {
- await loadExtensionFromTarget(extensions, target);
- }
- return extensions;
- }
- /**
- * Load extension manifest.json file
- *
- * @param {string} unpacked_path - Path to unpacked extension directory
- * @returns {object|null} - Parsed manifest object or null if not found/invalid
- */
- function loadExtensionManifest(unpacked_path) {
- const manifest_path = path.join(unpacked_path, 'manifest.json');
- if (!fs.existsSync(manifest_path)) {
- return null;
- }
- try {
- const manifest_content = fs.readFileSync(manifest_path, 'utf-8');
- return JSON.parse(manifest_content);
- } catch (error) {
- // Invalid JSON or read error
- return null;
- }
- }
- /**
- * @deprecated Use puppeteer's enableExtensions option instead.
- *
- * Generate Chrome launch arguments for loading extensions.
- * NOTE: This is deprecated. Use puppeteer.launch({ pipe: true, enableExtensions: [paths] }) instead.
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Array<string>} - Chrome CLI arguments for loading extensions
- */
- function getExtensionLaunchArgs(extensions) {
- console.warn('[DEPRECATED] getExtensionLaunchArgs is deprecated. Use puppeteer enableExtensions option instead.');
- if (!extensions || extensions.length === 0) {
- return [];
- }
- // Filter out extensions without unpacked_path first
- const validExtensions = extensions.filter(ext => ext.unpacked_path);
- const unpacked_paths = validExtensions.map(ext => ext.unpacked_path);
- // Use computed id (from path hash) for allowlisting, as that's what Chrome uses for unpacked extensions
- // Fall back to webstore_id if computed id not available
- const extension_ids = validExtensions.map(ext => ext.id || getExtensionId(ext.unpacked_path));
- return [
- `--load-extension=${unpacked_paths.join(',')}`,
- `--allowlisted-extension-id=${extension_ids.join(',')}`,
- '--allow-legacy-extension-manifests',
- '--disable-extensions-auto-update',
- ];
- }
- /**
- * Get extension paths for use with puppeteer's enableExtensions option.
- * Following puppeteer best practices: https://pptr.dev/guides/chrome-extensions
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Array<string>} - Array of extension unpacked paths
- */
- function getExtensionPaths(extensions) {
- if (!extensions || extensions.length === 0) {
- return [];
- }
- return extensions
- .filter(ext => ext.unpacked_path)
- .map(ext => ext.unpacked_path);
- }
- /**
- * Wait for an extension target to be available in the browser.
- * Following puppeteer best practices for accessing extension contexts.
- *
- * For Manifest V3 extensions (service workers):
- * const worker = await waitForExtensionTarget(browser, extensionId);
- * // worker is a WebWorker context
- *
- * For Manifest V2 extensions (background pages):
- * const page = await waitForExtensionTarget(browser, extensionId);
- * // page is a Page context
- *
- * @param {Object} browser - Puppeteer browser instance
- * @param {string} extensionId - Extension ID to wait for (computed from path hash)
- * @param {number} [timeout=30000] - Timeout in milliseconds
- * @returns {Promise<Object>} - Worker or Page context for the extension
- */
- async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
- // Try to find service worker first (Manifest V3)
- try {
- const workerTarget = await browser.waitForTarget(
- target => target.type() === 'service_worker' &&
- target.url().includes(`chrome-extension://${extensionId}`),
- { timeout }
- );
- const worker = await workerTarget.worker();
- if (worker) return worker;
- } catch (err) {
- // No service worker found, try background page
- }
- // Try background page (Manifest V2)
- try {
- const backgroundTarget = await browser.waitForTarget(
- target => target.type() === 'background_page' &&
- target.url().includes(`chrome-extension://${extensionId}`),
- { timeout }
- );
- const page = await backgroundTarget.page();
- if (page) return page;
- } catch (err) {
- // No background page found
- }
- // Try any extension page as fallback
- const extTarget = await browser.waitForTarget(
- target => target.url().startsWith(`chrome-extension://${extensionId}`),
- { timeout }
- );
- // Return worker or page depending on target type
- if (extTarget.type() === 'service_worker') {
- return await extTarget.worker();
- }
- return await extTarget.page();
- }
- /**
- * Get all loaded extension targets from a browser.
- *
- * @param {Object} browser - Puppeteer browser instance
- * @returns {Array<Object>} - Array of extension target info objects
- */
- function getExtensionTargets(browser) {
- return browser.targets()
- .filter(target =>
- target.url().startsWith('chrome-extension://') ||
- target.type() === 'service_worker' ||
- target.type() === 'background_page'
- )
- .map(target => ({
- type: target.type(),
- url: target.url(),
- extensionId: target.url().includes('chrome-extension://')
- ? target.url().split('chrome-extension://')[1]?.split('/')[0]
- : null,
- }));
- }
- /**
- * Find Chromium/Chrome binary path.
- * Checks CHROME_BINARY env var first, then falls back to system locations.
- *
- * @returns {string|null} - Absolute path to browser binary or null if not found
- */
- function findChromium() {
- const { execSync } = require('child_process');
- // Helper to validate a binary by running --version
- const validateBinary = (binaryPath) => {
- if (!binaryPath || !fs.existsSync(binaryPath)) return false;
- try {
- execSync(`"${binaryPath}" --version`, { encoding: 'utf8', timeout: 5000, stdio: 'pipe' });
- return true;
- } catch (e) {
- return false;
- }
- };
- // 1. Check CHROME_BINARY env var first
- const chromeBinary = getEnv('CHROME_BINARY');
- if (chromeBinary) {
- const absPath = path.resolve(chromeBinary);
- if (validateBinary(absPath)) {
- return absPath;
- }
- console.error(`[!] Warning: CHROME_BINARY="${chromeBinary}" is not valid`);
- }
- // 2. Warn that no CHROME_BINARY is configured, searching fallbacks
- if (!chromeBinary) {
- console.error('[!] Warning: CHROME_BINARY not set, searching system locations...');
- }
- // Helper to find Chromium in @puppeteer/browsers directory structure
- const findInPuppeteerDir = (baseDir) => {
- if (!fs.existsSync(baseDir)) return null;
- try {
- const versions = fs.readdirSync(baseDir);
- for (const version of versions.sort().reverse()) {
- const versionDir = path.join(baseDir, version);
- const candidates = [
- path.join(versionDir, 'chrome-mac-arm64/Chromium.app/Contents/MacOS/Chromium'),
- path.join(versionDir, 'chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
- path.join(versionDir, 'chrome-mac-x64/Chromium.app/Contents/MacOS/Chromium'),
- path.join(versionDir, 'chrome-linux64/chrome'),
- path.join(versionDir, 'chrome-linux/chrome'),
- ];
- for (const c of candidates) {
- if (fs.existsSync(c)) return c;
- }
- }
- } catch (e) {}
- return null;
- };
- // 3. Search fallback locations (Chromium first, then Chrome)
- const fallbackLocations = [
- // System Chromium
- '/Applications/Chromium.app/Contents/MacOS/Chromium',
- '/usr/bin/chromium',
- '/usr/bin/chromium-browser',
- // Puppeteer cache
- path.join(process.env.HOME || '', '.cache/puppeteer/chromium'),
- path.join(process.env.HOME || '', '.cache/puppeteer'),
- // Chrome (fallback - extensions may not work in 137+)
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
- '/usr/bin/google-chrome',
- '/usr/bin/google-chrome-stable',
- ];
- for (const loc of fallbackLocations) {
- // Check if it's a puppeteer cache dir
- if (loc.includes('.cache/puppeteer')) {
- const binary = findInPuppeteerDir(loc);
- if (binary && validateBinary(binary)) {
- return binary;
- }
- } else if (validateBinary(loc)) {
- if (loc.includes('Google Chrome') || loc.includes('google-chrome')) {
- console.error('[!] Warning: Using Chrome instead of Chromium. Extension loading may not work in Chrome 137+');
- }
- return loc;
- }
- }
- return null;
- }
- // Export all functions
- module.exports = {
- // Environment helpers
- getEnv,
- getEnvBool,
- getEnvInt,
- getEnvArray,
- parseResolution,
- // PID file management
- writePidWithMtime,
- writeCmdScript,
- // Port management
- findFreePort,
- waitForDebugPort,
- // Zombie cleanup
- killZombieChrome,
- // Chrome launching
- launchChromium,
- killChrome,
- // Chrome/Chromium install
- installChromium,
- installPuppeteerCore,
- // Chrome/Chromium binary finding
- findChromium,
- // Extension utilities
- getExtensionId,
- loadExtensionManifest,
- installExtension,
- loadOrInstallExtension,
- isTargetExtension,
- loadExtensionFromTarget,
- installAllExtensions,
- loadAllExtensionsFromBrowser,
- // New puppeteer best-practices helpers
- getExtensionPaths,
- waitForExtensionTarget,
- getExtensionTargets,
- // Deprecated - use enableExtensions option instead
- getExtensionLaunchArgs,
- };
- // CLI usage
- if (require.main === module) {
- const args = process.argv.slice(2);
- if (args.length === 0) {
- console.log('Usage: chrome_utils.js <command> [args...]');
- console.log('');
- console.log('Commands:');
- console.log(' findChromium');
- console.log(' installChromium');
- console.log(' installPuppeteerCore [npm_prefix]');
- console.log(' launchChromium [output_dir] [extension_paths_json]');
- console.log(' killChrome <pid> [output_dir]');
- console.log(' killZombieChrome [data_dir]');
- console.log(' getExtensionId <path>');
- console.log(' loadExtensionManifest <path>');
- console.log(' getExtensionLaunchArgs <extensions_json>');
- console.log(' loadOrInstallExtension <webstore_id> <name> [extensions_dir]');
- process.exit(1);
- }
- const [command, ...commandArgs] = args;
- (async () => {
- try {
- switch (command) {
- case 'findChromium': {
- const binary = findChromium();
- if (binary) {
- console.log(binary);
- } else {
- console.error('Chromium binary not found');
- process.exit(1);
- }
- break;
- }
- case 'installChromium': {
- const result = await installChromium();
- if (result.success) {
- console.log(JSON.stringify({
- binary: result.binary,
- version: result.version,
- }));
- } else {
- console.error(result.error);
- process.exit(1);
- }
- break;
- }
- case 'installPuppeteerCore': {
- const [npmPrefix] = commandArgs;
- const result = await installPuppeteerCore({ npmPrefix: npmPrefix || undefined });
- if (result.success) {
- console.log(JSON.stringify({ path: result.path }));
- } else {
- console.error(result.error);
- process.exit(1);
- }
- break;
- }
- case 'launchChromium': {
- const [outputDir, extensionPathsJson] = commandArgs;
- const extensionPaths = extensionPathsJson ? JSON.parse(extensionPathsJson) : [];
- const result = await launchChromium({
- outputDir: outputDir || 'chrome',
- extensionPaths,
- });
- if (result.success) {
- console.log(JSON.stringify({
- cdpUrl: result.cdpUrl,
- pid: result.pid,
- port: result.port,
- }));
- } else {
- console.error(result.error);
- process.exit(1);
- }
- break;
- }
- case 'killChrome': {
- const [pidStr, outputDir] = commandArgs;
- const pid = parseInt(pidStr, 10);
- if (isNaN(pid)) {
- console.error('Invalid PID');
- process.exit(1);
- }
- await killChrome(pid, outputDir);
- break;
- }
- case 'killZombieChrome': {
- const [dataDir] = commandArgs;
- const killed = killZombieChrome(dataDir);
- console.log(killed);
- break;
- }
- case 'getExtensionId': {
- const [unpacked_path] = commandArgs;
- const id = getExtensionId(unpacked_path);
- console.log(id);
- break;
- }
- case 'loadExtensionManifest': {
- const [unpacked_path] = commandArgs;
- const manifest = loadExtensionManifest(unpacked_path);
- console.log(JSON.stringify(manifest));
- break;
- }
- case 'getExtensionLaunchArgs': {
- const [extensions_json] = commandArgs;
- const extensions = JSON.parse(extensions_json);
- const launchArgs = getExtensionLaunchArgs(extensions);
- console.log(JSON.stringify(launchArgs));
- break;
- }
- case 'loadOrInstallExtension': {
- const [webstore_id, name, extensions_dir] = commandArgs;
- const ext = await loadOrInstallExtension({ webstore_id, name }, extensions_dir);
- console.log(JSON.stringify(ext, null, 2));
- break;
- }
- default:
- console.error(`Unknown command: ${command}`);
- process.exit(1);
- }
- } catch (error) {
- console.error(`Error: ${error.message}`);
- process.exit(1);
- }
- })();
- }
|