chrome_utils.js 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508
  1. #!/usr/bin/env node
  2. /**
  3. * Chrome Extension Management Utilities
  4. *
  5. * Handles downloading, installing, and managing Chrome extensions for browser automation.
  6. * Ported from the TypeScript implementation in archivebox.ts
  7. */
  8. const fs = require('fs');
  9. const path = require('path');
  10. const crypto = require('crypto');
  11. const http = require('http');
  12. const net = require('net');
  13. const { exec, spawn } = require('child_process');
  14. const { promisify } = require('util');
  15. const { Readable } = require('stream');
  16. const { finished } = require('stream/promises');
  17. const execAsync = promisify(exec);
  18. // ============================================================================
  19. // Environment helpers
  20. // ============================================================================
  21. /**
  22. * Get environment variable with default value.
  23. * @param {string} name - Environment variable name
  24. * @param {string} [defaultValue=''] - Default value if not set
  25. * @returns {string} - Trimmed environment variable value
  26. */
  27. function getEnv(name, defaultValue = '') {
  28. return (process.env[name] || defaultValue).trim();
  29. }
  30. /**
  31. * Get boolean environment variable.
  32. * @param {string} name - Environment variable name
  33. * @param {boolean} [defaultValue=false] - Default value if not set
  34. * @returns {boolean} - Boolean value
  35. */
  36. function getEnvBool(name, defaultValue = false) {
  37. const val = getEnv(name, '').toLowerCase();
  38. if (['true', '1', 'yes', 'on'].includes(val)) return true;
  39. if (['false', '0', 'no', 'off'].includes(val)) return false;
  40. return defaultValue;
  41. }
  42. /**
  43. * Get integer environment variable.
  44. * @param {string} name - Environment variable name
  45. * @param {number} [defaultValue=0] - Default value if not set
  46. * @returns {number} - Integer value
  47. */
  48. function getEnvInt(name, defaultValue = 0) {
  49. const val = parseInt(getEnv(name, String(defaultValue)), 10);
  50. return isNaN(val) ? defaultValue : val;
  51. }
  52. /**
  53. * Get array environment variable (JSON array or comma-separated string).
  54. *
  55. * Parsing priority:
  56. * 1. JSON array format (recommended): '["--arg1=value", "--arg2=value,with,commas"]'
  57. * 2. Comma-before-flag format: '--arg1=value,--arg2=value' (splits on ,-- pattern)
  58. * 3. Single Chrome flag: '--flag=value,with,commas' (no split, returns as single item)
  59. * 4. Simple comma-separated: 'a,b,c' (splits on commas)
  60. *
  61. * For Chrome arguments that may contain internal commas, use JSON format:
  62. * CHROME_ARGS='["--user-data-dir=/path/with,comma", "--window-size=1440,900"]'
  63. *
  64. * @param {string} name - Environment variable name
  65. * @param {string[]} [defaultValue=[]] - Default value if not set
  66. * @returns {string[]} - Array of strings
  67. */
  68. function getEnvArray(name, defaultValue = []) {
  69. const val = getEnv(name, '');
  70. if (!val) return defaultValue;
  71. // Try parsing as JSON array first (recommended format for args with commas)
  72. if (val.startsWith('[')) {
  73. try {
  74. const parsed = JSON.parse(val);
  75. if (Array.isArray(parsed)) return parsed;
  76. } catch (e) {
  77. // Fall through to comma-separated parsing
  78. }
  79. }
  80. // Parse as comma-separated (but be careful with args that contain commas)
  81. // For Chrome args, we split on comma followed by '--' to be safe
  82. if (val.includes(',--')) {
  83. return val.split(/,(?=--)/).map(s => s.trim()).filter(Boolean);
  84. }
  85. // If the value looks like a single Chrome flag (starts with -- and contains no ,--),
  86. // treat the entire value as a single argument to avoid splitting internal commas
  87. // e.g., '--user-data-dir=/path/with,comma/in/it' should not be split
  88. if (val.startsWith('--') && !val.includes(',--')) {
  89. return [val.trim()];
  90. }
  91. // Simple comma-separated (for non-Chrome-flag values like 'a,b,c')
  92. return val.split(',').map(s => s.trim()).filter(Boolean);
  93. }
  94. /**
  95. * Parse resolution string into width/height.
  96. * @param {string} resolution - Resolution string like "1440,2000"
  97. * @returns {{width: number, height: number}} - Parsed dimensions
  98. */
  99. function parseResolution(resolution) {
  100. const [width, height] = resolution.split(',').map(x => parseInt(x.trim(), 10));
  101. return { width: width || 1440, height: height || 2000 };
  102. }
  103. // ============================================================================
  104. // PID file management
  105. // ============================================================================
  106. /**
  107. * Write PID file with specific mtime for process validation.
  108. * @param {string} filePath - Path to PID file
  109. * @param {number} pid - Process ID
  110. * @param {number} startTimeSeconds - Process start time in seconds
  111. */
  112. function writePidWithMtime(filePath, pid, startTimeSeconds) {
  113. fs.writeFileSync(filePath, String(pid));
  114. const startTimeMs = startTimeSeconds * 1000;
  115. fs.utimesSync(filePath, new Date(startTimeMs), new Date(startTimeMs));
  116. }
  117. /**
  118. * Write a shell script that can re-run the Chrome command.
  119. * @param {string} filePath - Path to script file
  120. * @param {string} binary - Chrome binary path
  121. * @param {string[]} args - Chrome arguments
  122. */
  123. function writeCmdScript(filePath, binary, args) {
  124. const escape = (arg) =>
  125. arg.includes(' ') || arg.includes('"') || arg.includes('$')
  126. ? `"${arg.replace(/"/g, '\\"')}"`
  127. : arg;
  128. fs.writeFileSync(
  129. filePath,
  130. `#!/bin/bash\n${binary} ${args.map(escape).join(' ')}\n`
  131. );
  132. fs.chmodSync(filePath, 0o755);
  133. }
  134. // ============================================================================
  135. // Port management
  136. // ============================================================================
  137. /**
  138. * Find a free port on localhost.
  139. * @returns {Promise<number>} - Available port number
  140. */
  141. function findFreePort() {
  142. return new Promise((resolve, reject) => {
  143. const server = net.createServer();
  144. server.unref();
  145. server.on('error', reject);
  146. server.listen(0, () => {
  147. const port = server.address().port;
  148. server.close(() => resolve(port));
  149. });
  150. });
  151. }
  152. /**
  153. * Wait for Chrome's DevTools port to be ready.
  154. * @param {number} port - Debug port number
  155. * @param {number} [timeout=30000] - Timeout in milliseconds
  156. * @returns {Promise<Object>} - Chrome version info
  157. */
  158. function waitForDebugPort(port, timeout = 30000) {
  159. const startTime = Date.now();
  160. return new Promise((resolve, reject) => {
  161. const tryConnect = () => {
  162. if (Date.now() - startTime > timeout) {
  163. reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
  164. return;
  165. }
  166. const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
  167. let data = '';
  168. res.on('data', (chunk) => (data += chunk));
  169. res.on('end', () => {
  170. try {
  171. const info = JSON.parse(data);
  172. resolve(info);
  173. } catch (e) {
  174. setTimeout(tryConnect, 100);
  175. }
  176. });
  177. });
  178. req.on('error', () => {
  179. setTimeout(tryConnect, 100);
  180. });
  181. req.setTimeout(1000, () => {
  182. req.destroy();
  183. setTimeout(tryConnect, 100);
  184. });
  185. };
  186. tryConnect();
  187. });
  188. }
  189. // ============================================================================
  190. // Zombie process cleanup
  191. // ============================================================================
  192. /**
  193. * Kill zombie Chrome processes from stale crawls.
  194. * Scans DATA_DIR/crawls/<crawl_id>/chrome/<name>.pid for stale processes.
  195. * @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.')
  196. * @returns {number} - Number of zombies killed
  197. */
  198. function killZombieChrome(dataDir = null) {
  199. dataDir = dataDir || getEnv('DATA_DIR', '.');
  200. const crawlsDir = path.join(dataDir, 'crawls');
  201. const now = Date.now();
  202. const fiveMinutesAgo = now - 300000;
  203. let killed = 0;
  204. console.error('[*] Checking for zombie Chrome processes...');
  205. if (!fs.existsSync(crawlsDir)) {
  206. console.error('[+] No crawls directory found');
  207. return 0;
  208. }
  209. try {
  210. const crawls = fs.readdirSync(crawlsDir, { withFileTypes: true });
  211. for (const crawl of crawls) {
  212. if (!crawl.isDirectory()) continue;
  213. const crawlDir = path.join(crawlsDir, crawl.name);
  214. const chromeDir = path.join(crawlDir, 'chrome');
  215. if (!fs.existsSync(chromeDir)) continue;
  216. // Check if crawl was modified recently (still active)
  217. try {
  218. const crawlStats = fs.statSync(crawlDir);
  219. if (crawlStats.mtimeMs > fiveMinutesAgo) {
  220. continue;
  221. }
  222. } catch (e) {
  223. continue;
  224. }
  225. // Crawl is stale, check for PIDs
  226. try {
  227. const pidFiles = fs.readdirSync(chromeDir).filter(f => f.endsWith('.pid'));
  228. for (const pidFileName of pidFiles) {
  229. const pidFile = path.join(chromeDir, pidFileName);
  230. try {
  231. const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
  232. if (isNaN(pid) || pid <= 0) continue;
  233. // Check if process exists
  234. try {
  235. process.kill(pid, 0);
  236. } catch (e) {
  237. // Process dead, remove stale PID file
  238. try { fs.unlinkSync(pidFile); } catch (e) {}
  239. continue;
  240. }
  241. // Process alive and crawl is stale - zombie!
  242. console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${crawl.name}`);
  243. try {
  244. try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
  245. killed++;
  246. console.error(`[+] Killed zombie (PID ${pid})`);
  247. try { fs.unlinkSync(pidFile); } catch (e) {}
  248. } catch (e) {
  249. console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
  250. }
  251. } catch (e) {
  252. // Skip invalid PID files
  253. }
  254. }
  255. } catch (e) {
  256. // Skip if can't read chrome dir
  257. }
  258. }
  259. } catch (e) {
  260. console.error(`[!] Error scanning crawls: ${e.message}`);
  261. }
  262. if (killed > 0) {
  263. console.error(`[+] Killed ${killed} zombie process(es)`);
  264. } else {
  265. console.error('[+] No zombies found');
  266. }
  267. // Clean up stale SingletonLock files from persona chrome_user_data directories
  268. const personasDir = path.join(dataDir, 'personas');
  269. if (fs.existsSync(personasDir)) {
  270. try {
  271. const personas = fs.readdirSync(personasDir, { withFileTypes: true });
  272. for (const persona of personas) {
  273. if (!persona.isDirectory()) continue;
  274. const userDataDir = path.join(personasDir, persona.name, 'chrome_user_data');
  275. const singletonLock = path.join(userDataDir, 'SingletonLock');
  276. if (fs.existsSync(singletonLock)) {
  277. try {
  278. fs.unlinkSync(singletonLock);
  279. console.error(`[+] Removed stale SingletonLock: ${singletonLock}`);
  280. } catch (e) {
  281. // Ignore - may be in use by active Chrome
  282. }
  283. }
  284. }
  285. } catch (e) {
  286. // Ignore errors scanning personas directory
  287. }
  288. }
  289. return killed;
  290. }
  291. // ============================================================================
  292. // Chrome launching
  293. // ============================================================================
  294. /**
  295. * Launch Chromium with extensions and return connection info.
  296. *
  297. * @param {Object} options - Launch options
  298. * @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
  299. * @param {string} [options.outputDir='chrome'] - Directory for output files
  300. * @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
  301. * @param {string} [options.resolution='1440,2000'] - Window resolution
  302. * @param {boolean} [options.headless=true] - Run in headless mode
  303. * @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
  304. * @param {boolean} [options.checkSsl=true] - Check SSL certificates
  305. * @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
  306. * @param {boolean} [options.killZombies=true] - Kill zombie processes first
  307. * @returns {Promise<Object>} - {success, cdpUrl, pid, port, process, error}
  308. */
  309. async function launchChromium(options = {}) {
  310. const {
  311. binary = findChromium(),
  312. outputDir = 'chrome',
  313. userDataDir = getEnv('CHROME_USER_DATA_DIR'),
  314. resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
  315. headless = getEnvBool('CHROME_HEADLESS', true),
  316. sandbox = getEnvBool('CHROME_SANDBOX', true),
  317. checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
  318. extensionPaths = [],
  319. killZombies = true,
  320. } = options;
  321. if (!binary) {
  322. return { success: false, error: 'Chrome binary not found' };
  323. }
  324. // Kill zombies first
  325. if (killZombies) {
  326. killZombieChrome();
  327. }
  328. const { width, height } = parseResolution(resolution);
  329. // Create output directory
  330. if (!fs.existsSync(outputDir)) {
  331. fs.mkdirSync(outputDir, { recursive: true });
  332. }
  333. // Create user data directory if specified and doesn't exist
  334. if (userDataDir) {
  335. if (!fs.existsSync(userDataDir)) {
  336. fs.mkdirSync(userDataDir, { recursive: true });
  337. console.error(`[*] Created user data directory: ${userDataDir}`);
  338. }
  339. // Clean up any stale SingletonLock file from previous crashed sessions
  340. const singletonLock = path.join(userDataDir, 'SingletonLock');
  341. if (fs.existsSync(singletonLock)) {
  342. try {
  343. fs.unlinkSync(singletonLock);
  344. console.error(`[*] Removed stale SingletonLock: ${singletonLock}`);
  345. } catch (e) {
  346. console.error(`[!] Failed to remove SingletonLock: ${e.message}`);
  347. }
  348. }
  349. }
  350. // Find a free port
  351. const debugPort = await findFreePort();
  352. console.error(`[*] Using debug port: ${debugPort}`);
  353. // Get base Chrome args from config (static flags from CHROME_ARGS env var)
  354. // These come from config.json defaults, merged by get_config() in Python
  355. const baseArgs = getEnvArray('CHROME_ARGS', []);
  356. // Get extra user-provided args
  357. const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
  358. // Build dynamic Chrome arguments (these must be computed at runtime)
  359. const dynamicArgs = [
  360. // Remote debugging setup
  361. `--remote-debugging-port=${debugPort}`,
  362. '--remote-debugging-address=127.0.0.1',
  363. // Sandbox settings (disable in Docker)
  364. ...(sandbox ? [] : ['--no-sandbox', '--disable-setuid-sandbox']),
  365. // Docker-specific workarounds
  366. '--disable-dev-shm-usage',
  367. '--disable-gpu',
  368. // Window size
  369. `--window-size=${width},${height}`,
  370. // User data directory (for persistent sessions with persona)
  371. ...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
  372. // Headless mode
  373. ...(headless ? ['--headless=new'] : []),
  374. // SSL certificate checking
  375. ...(checkSsl ? [] : ['--ignore-certificate-errors']),
  376. ];
  377. // Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
  378. // Dynamic args come after base so they can override if needed
  379. const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
  380. // Add extension loading flags
  381. if (extensionPaths.length > 0) {
  382. const extPathsArg = extensionPaths.join(',');
  383. chromiumArgs.push(`--load-extension=${extPathsArg}`);
  384. chromiumArgs.push('--enable-unsafe-extension-debugging');
  385. chromiumArgs.push('--disable-features=DisableLoadExtensionCommandLineSwitch,ExtensionManifestV2Unsupported,ExtensionManifestV2Disabled');
  386. console.error(`[*] Loading ${extensionPaths.length} extension(s) via --load-extension`);
  387. }
  388. chromiumArgs.push('about:blank');
  389. // Write command script for debugging
  390. writeCmdScript(path.join(outputDir, 'cmd.sh'), binary, chromiumArgs);
  391. try {
  392. console.error(`[*] Spawning Chromium (headless=${headless})...`);
  393. const chromiumProcess = spawn(binary, chromiumArgs, {
  394. stdio: ['ignore', 'pipe', 'pipe'],
  395. detached: true,
  396. });
  397. const chromePid = chromiumProcess.pid;
  398. const chromeStartTime = Date.now() / 1000;
  399. if (chromePid) {
  400. console.error(`[*] Chromium spawned (PID: ${chromePid})`);
  401. writePidWithMtime(path.join(outputDir, 'chrome.pid'), chromePid, chromeStartTime);
  402. }
  403. // Pipe Chrome output to stderr
  404. chromiumProcess.stdout.on('data', (data) => {
  405. process.stderr.write(`[chromium:stdout] ${data}`);
  406. });
  407. chromiumProcess.stderr.on('data', (data) => {
  408. process.stderr.write(`[chromium:stderr] ${data}`);
  409. });
  410. // Wait for debug port
  411. console.error(`[*] Waiting for debug port ${debugPort}...`);
  412. const versionInfo = await waitForDebugPort(debugPort, 30000);
  413. const wsUrl = versionInfo.webSocketDebuggerUrl;
  414. console.error(`[+] Chromium ready: ${wsUrl}`);
  415. fs.writeFileSync(path.join(outputDir, 'cdp_url.txt'), wsUrl);
  416. fs.writeFileSync(path.join(outputDir, 'port.txt'), String(debugPort));
  417. return {
  418. success: true,
  419. cdpUrl: wsUrl,
  420. pid: chromePid,
  421. port: debugPort,
  422. process: chromiumProcess,
  423. };
  424. } catch (e) {
  425. return { success: false, error: `${e.name}: ${e.message}` };
  426. }
  427. }
  428. /**
  429. * Check if a process is still running.
  430. * @param {number} pid - Process ID to check
  431. * @returns {boolean} - True if process exists
  432. */
  433. function isProcessAlive(pid) {
  434. try {
  435. process.kill(pid, 0); // Signal 0 checks existence without killing
  436. return true;
  437. } catch (e) {
  438. return false;
  439. }
  440. }
  441. /**
  442. * Find all Chrome child processes for a given debug port.
  443. * @param {number} port - Debug port number
  444. * @returns {Array<number>} - Array of PIDs
  445. */
  446. function findChromeProcessesByPort(port) {
  447. const { execSync } = require('child_process');
  448. const pids = [];
  449. try {
  450. // Find all Chrome processes using this debug port
  451. const output = execSync(
  452. `ps aux | grep -i "chrome.*--remote-debugging-port=${port}" | grep -v grep | awk '{print $2}'`,
  453. { encoding: 'utf8', timeout: 5000 }
  454. );
  455. for (const line of output.split('\n')) {
  456. const pid = parseInt(line.trim(), 10);
  457. if (!isNaN(pid) && pid > 0) {
  458. pids.push(pid);
  459. }
  460. }
  461. } catch (e) {
  462. // Command failed or no processes found
  463. }
  464. return pids;
  465. }
  466. /**
  467. * Kill a Chrome process by PID.
  468. * Always sends SIGTERM before SIGKILL, then verifies death.
  469. *
  470. * @param {number} pid - Process ID to kill
  471. * @param {string} [outputDir] - Directory containing PID files to clean up
  472. */
  473. async function killChrome(pid, outputDir = null) {
  474. if (!pid) return;
  475. console.error(`[*] Killing Chrome process tree (PID ${pid})...`);
  476. // Get debug port for finding child processes
  477. let debugPort = null;
  478. if (outputDir) {
  479. try {
  480. const portFile = path.join(outputDir, 'port.txt');
  481. if (fs.existsSync(portFile)) {
  482. debugPort = parseInt(fs.readFileSync(portFile, 'utf8').trim(), 10);
  483. }
  484. } catch (e) {}
  485. }
  486. // Step 1: SIGTERM to process group (graceful shutdown)
  487. console.error(`[*] Sending SIGTERM to process group -${pid}...`);
  488. try {
  489. process.kill(-pid, 'SIGTERM');
  490. } catch (e) {
  491. try {
  492. console.error(`[*] Process group kill failed, trying single process...`);
  493. process.kill(pid, 'SIGTERM');
  494. } catch (e2) {
  495. console.error(`[!] SIGTERM failed: ${e2.message}`);
  496. }
  497. }
  498. // Step 2: Wait for graceful shutdown
  499. await new Promise(resolve => setTimeout(resolve, 2000));
  500. // Step 3: Check if still alive
  501. if (!isProcessAlive(pid)) {
  502. console.error('[+] Chrome process terminated gracefully');
  503. } else {
  504. // Step 4: Force kill ENTIRE process group with SIGKILL
  505. console.error(`[*] Process still alive, sending SIGKILL to process group -${pid}...`);
  506. try {
  507. process.kill(-pid, 'SIGKILL'); // Kill entire process group
  508. } catch (e) {
  509. console.error(`[!] Process group SIGKILL failed, trying single process: ${e.message}`);
  510. try {
  511. process.kill(pid, 'SIGKILL');
  512. } catch (e2) {
  513. console.error(`[!] SIGKILL failed: ${e2.message}`);
  514. }
  515. }
  516. // Step 5: Wait briefly and verify death
  517. await new Promise(resolve => setTimeout(resolve, 1000));
  518. if (isProcessAlive(pid)) {
  519. console.error(`[!] WARNING: Process ${pid} is unkillable (likely in UNE state)`);
  520. console.error(`[!] This typically happens when Chrome crashes in kernel syscall`);
  521. console.error(`[!] Process will remain as zombie until system reboot`);
  522. console.error(`[!] macOS IOSurface crash creates unkillable processes in UNE state`);
  523. // Try one more time to kill the entire process group
  524. if (debugPort) {
  525. const relatedPids = findChromeProcessesByPort(debugPort);
  526. if (relatedPids.length > 1) {
  527. console.error(`[*] Found ${relatedPids.length} Chrome processes still running on port ${debugPort}`);
  528. console.error(`[*] Attempting final process group SIGKILL...`);
  529. // Try to kill each unique process group we find
  530. const processGroups = new Set();
  531. for (const relatedPid of relatedPids) {
  532. if (relatedPid !== pid) {
  533. processGroups.add(relatedPid);
  534. }
  535. }
  536. for (const groupPid of processGroups) {
  537. try {
  538. process.kill(-groupPid, 'SIGKILL');
  539. } catch (e) {}
  540. }
  541. }
  542. }
  543. } else {
  544. console.error('[+] Chrome process group killed successfully');
  545. }
  546. }
  547. // Step 8: Clean up PID files
  548. if (outputDir) {
  549. try { fs.unlinkSync(path.join(outputDir, 'chrome.pid')); } catch (e) {}
  550. try { fs.unlinkSync(path.join(outputDir, 'hook.pid')); } catch (e) {}
  551. }
  552. console.error('[*] Chrome cleanup completed');
  553. }
  554. /**
  555. * Install Chromium using @puppeteer/browsers programmatic API.
  556. * Uses puppeteer's default cache location, returns the binary path.
  557. *
  558. * @param {Object} options - Install options
  559. * @returns {Promise<Object>} - {success, binary, version, error}
  560. */
  561. async function installChromium(options = {}) {
  562. // Check if CHROME_BINARY is already set and valid
  563. const configuredBinary = getEnv('CHROME_BINARY');
  564. if (configuredBinary && fs.existsSync(configuredBinary)) {
  565. console.error(`[+] Using configured CHROME_BINARY: ${configuredBinary}`);
  566. return { success: true, binary: configuredBinary, version: null };
  567. }
  568. // Try to load @puppeteer/browsers from NODE_MODULES_DIR or system
  569. let puppeteerBrowsers;
  570. try {
  571. if (process.env.NODE_MODULES_DIR) {
  572. module.paths.unshift(process.env.NODE_MODULES_DIR);
  573. }
  574. puppeteerBrowsers = require('@puppeteer/browsers');
  575. } catch (e) {
  576. console.error(`[!] @puppeteer/browsers not found. Install it first with installPuppeteerCore.`);
  577. return { success: false, error: '@puppeteer/browsers not installed' };
  578. }
  579. console.error(`[*] Installing Chromium via @puppeteer/browsers...`);
  580. try {
  581. const result = await puppeteerBrowsers.install({
  582. browser: 'chromium',
  583. buildId: 'latest',
  584. });
  585. const binary = result.executablePath;
  586. const version = result.buildId;
  587. if (!binary || !fs.existsSync(binary)) {
  588. console.error(`[!] Chromium binary not found at: ${binary}`);
  589. return { success: false, error: `Chromium binary not found at: ${binary}` };
  590. }
  591. console.error(`[+] Chromium installed: ${binary}`);
  592. return { success: true, binary, version };
  593. } catch (e) {
  594. console.error(`[!] Failed to install Chromium: ${e.message}`);
  595. return { success: false, error: e.message };
  596. }
  597. }
  598. /**
  599. * Install puppeteer-core npm package.
  600. *
  601. * @param {Object} options - Install options
  602. * @param {string} [options.npmPrefix] - npm prefix directory (default: DATA_DIR/lib/<arch>/npm or ./node_modules parent)
  603. * @param {number} [options.timeout=60000] - Timeout in milliseconds
  604. * @returns {Promise<Object>} - {success, path, error}
  605. */
  606. async function installPuppeteerCore(options = {}) {
  607. const arch = `${process.arch}-${process.platform}`;
  608. const defaultPrefix = path.join(getEnv('LIB_DIR', getEnv('DATA_DIR', '.')), 'npm');
  609. const {
  610. npmPrefix = defaultPrefix,
  611. timeout = 60000,
  612. } = options;
  613. const nodeModulesDir = path.join(npmPrefix, 'node_modules');
  614. const puppeteerPath = path.join(nodeModulesDir, 'puppeteer-core');
  615. // Check if already installed
  616. if (fs.existsSync(puppeteerPath)) {
  617. console.error(`[+] puppeteer-core already installed: ${puppeteerPath}`);
  618. return { success: true, path: puppeteerPath };
  619. }
  620. console.error(`[*] Installing puppeteer-core to ${npmPrefix}...`);
  621. // Create directory
  622. if (!fs.existsSync(npmPrefix)) {
  623. fs.mkdirSync(npmPrefix, { recursive: true });
  624. }
  625. try {
  626. const { execSync } = require('child_process');
  627. execSync(
  628. `npm install --prefix "${npmPrefix}" puppeteer-core`,
  629. { encoding: 'utf8', timeout, stdio: ['pipe', 'pipe', 'pipe'] }
  630. );
  631. console.error(`[+] puppeteer-core installed successfully`);
  632. return { success: true, path: puppeteerPath };
  633. } catch (e) {
  634. console.error(`[!] Failed to install puppeteer-core: ${e.message}`);
  635. return { success: false, error: e.message };
  636. }
  637. }
  638. // Try to import unzipper, fallback to system unzip if not available
  639. let unzip = null;
  640. try {
  641. const unzipper = require('unzipper');
  642. unzip = async (sourcePath, destPath) => {
  643. const stream = fs.createReadStream(sourcePath).pipe(unzipper.Extract({ path: destPath }));
  644. return stream.promise();
  645. };
  646. } catch (err) {
  647. // Will use system unzip command as fallback
  648. }
  649. /**
  650. * Compute the extension ID from the unpacked path.
  651. * Chrome uses a SHA256 hash of the unpacked extension directory path to compute a dynamic id.
  652. *
  653. * @param {string} unpacked_path - Path to the unpacked extension directory
  654. * @returns {string} - 32-character extension ID
  655. */
  656. function getExtensionId(unpacked_path) {
  657. // Chrome uses a SHA256 hash of the unpacked extension directory path
  658. const hash = crypto.createHash('sha256');
  659. hash.update(Buffer.from(unpacked_path, 'utf-8'));
  660. // Convert first 32 hex chars to characters in the range 'a'-'p'
  661. const detected_extension_id = Array.from(hash.digest('hex'))
  662. .slice(0, 32)
  663. .map(i => String.fromCharCode(parseInt(i, 16) + 'a'.charCodeAt(0)))
  664. .join('');
  665. return detected_extension_id;
  666. }
  667. /**
  668. * Download and install a Chrome extension from the Chrome Web Store.
  669. *
  670. * @param {Object} extension - Extension metadata object
  671. * @param {string} extension.webstore_id - Chrome Web Store extension ID
  672. * @param {string} extension.name - Human-readable extension name
  673. * @param {string} extension.crx_url - URL to download the CRX file
  674. * @param {string} extension.crx_path - Local path to save the CRX file
  675. * @param {string} extension.unpacked_path - Path to extract the extension
  676. * @returns {Promise<boolean>} - True if installation succeeded
  677. */
  678. async function installExtension(extension) {
  679. const manifest_path = path.join(extension.unpacked_path, 'manifest.json');
  680. // Download CRX file if not already downloaded
  681. if (!fs.existsSync(manifest_path) && !fs.existsSync(extension.crx_path)) {
  682. console.log(`[🛠️] Downloading missing extension ${extension.name} ${extension.webstore_id} -> ${extension.crx_path}`);
  683. try {
  684. // Ensure parent directory exists
  685. const crxDir = path.dirname(extension.crx_path);
  686. if (!fs.existsSync(crxDir)) {
  687. fs.mkdirSync(crxDir, { recursive: true });
  688. }
  689. // Download CRX file from Chrome Web Store
  690. const response = await fetch(extension.crx_url);
  691. if (!response.ok) {
  692. console.warn(`[⚠️] Failed to download extension ${extension.name}: HTTP ${response.status}`);
  693. return false;
  694. }
  695. if (response.body) {
  696. const crx_file = fs.createWriteStream(extension.crx_path);
  697. const crx_stream = Readable.fromWeb(response.body);
  698. await finished(crx_stream.pipe(crx_file));
  699. } else {
  700. console.warn(`[⚠️] Failed to download extension ${extension.name}: No response body`);
  701. return false;
  702. }
  703. } catch (err) {
  704. console.error(`[❌] Failed to download extension ${extension.name}:`, err);
  705. return false;
  706. }
  707. }
  708. // Unzip CRX file to unpacked_path (CRX files have extra header bytes but unzip handles it)
  709. await fs.promises.mkdir(extension.unpacked_path, { recursive: true });
  710. try {
  711. // Use -q to suppress warnings about extra bytes in CRX header
  712. await execAsync(`/usr/bin/unzip -q -o "${extension.crx_path}" -d "${extension.unpacked_path}"`);
  713. } catch (err1) {
  714. // unzip may return non-zero even on success due to CRX header warning, check if manifest exists
  715. if (!fs.existsSync(manifest_path)) {
  716. if (unzip) {
  717. // Fallback to unzipper library
  718. try {
  719. await unzip(extension.crx_path, extension.unpacked_path);
  720. } catch (err2) {
  721. console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err2.message);
  722. return false;
  723. }
  724. } else {
  725. console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message);
  726. return false;
  727. }
  728. }
  729. }
  730. if (!fs.existsSync(manifest_path)) {
  731. console.error(`[❌] Failed to install ${extension.crx_path}: could not find manifest.json in unpacked_path`);
  732. return false;
  733. }
  734. return true;
  735. }
  736. /**
  737. * Load or install a Chrome extension, computing all metadata.
  738. *
  739. * @param {Object} ext - Partial extension metadata (at minimum: webstore_id or unpacked_path)
  740. * @param {string} [ext.webstore_id] - Chrome Web Store extension ID
  741. * @param {string} [ext.name] - Human-readable extension name
  742. * @param {string} [ext.unpacked_path] - Path to unpacked extension
  743. * @param {string} [extensions_dir] - Directory to store extensions
  744. * @returns {Promise<Object>} - Complete extension metadata object
  745. */
  746. async function loadOrInstallExtension(ext, extensions_dir = null) {
  747. if (!(ext.webstore_id || ext.unpacked_path)) {
  748. throw new Error('Extension must have either {webstore_id} or {unpacked_path}');
  749. }
  750. // Determine extensions directory
  751. const EXTENSIONS_DIR = extensions_dir || process.env.CHROME_EXTENSIONS_DIR || './data/chrome_extensions';
  752. // Set statically computable extension metadata
  753. ext.webstore_id = ext.webstore_id || ext.id;
  754. ext.name = ext.name || ext.webstore_id;
  755. ext.webstore_url = ext.webstore_url || `https://chromewebstore.google.com/detail/${ext.webstore_id}`;
  756. ext.crx_url = ext.crx_url || `https://clients2.google.com/service/update2/crx?response=redirect&prodversion=1230&acceptformat=crx3&x=id%3D${ext.webstore_id}%26uc`;
  757. ext.crx_path = ext.crx_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}.crx`);
  758. ext.unpacked_path = ext.unpacked_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}`);
  759. const manifest_path = path.join(ext.unpacked_path, 'manifest.json');
  760. ext.read_manifest = () => JSON.parse(fs.readFileSync(manifest_path, 'utf-8'));
  761. ext.read_version = () => fs.existsSync(manifest_path) && ext.read_manifest()?.version || null;
  762. // If extension is not installed, download and unpack it
  763. if (!ext.read_version()) {
  764. await installExtension(ext);
  765. }
  766. // Autodetect ID from filesystem path (unpacked extensions don't have stable IDs)
  767. ext.id = getExtensionId(ext.unpacked_path);
  768. ext.version = ext.read_version();
  769. if (!ext.version) {
  770. console.warn(`[❌] Unable to detect ID and version of installed extension ${ext.unpacked_path}`);
  771. } else {
  772. console.log(`[➕] Installed extension ${ext.name} (${ext.version})... ${ext.unpacked_path}`);
  773. }
  774. return ext;
  775. }
  776. /**
  777. * Check if a Puppeteer target is an extension background page/service worker.
  778. *
  779. * @param {Object} target - Puppeteer target object
  780. * @returns {Promise<Object>} - Object with target_is_bg, extension_id, manifest_version, etc.
  781. */
  782. async function isTargetExtension(target) {
  783. let target_type;
  784. let target_ctx;
  785. let target_url;
  786. try {
  787. target_type = target.type();
  788. target_ctx = (await target.worker()) || (await target.page()) || null;
  789. target_url = target.url() || target_ctx?.url() || null;
  790. } catch (err) {
  791. if (String(err).includes('No target with given id found')) {
  792. // Target closed during check, ignore harmless race condition
  793. target_type = 'closed';
  794. target_ctx = null;
  795. target_url = 'about:closed';
  796. } else {
  797. throw err;
  798. }
  799. }
  800. // Check if this is an extension background page or service worker
  801. const is_chrome_extension = target_url?.startsWith('chrome-extension://');
  802. const is_background_page = target_type === 'background_page';
  803. const is_service_worker = target_type === 'service_worker';
  804. const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker);
  805. let extension_id = null;
  806. let manifest_version = null;
  807. const target_is_extension = is_chrome_extension || target_is_bg;
  808. if (target_is_extension) {
  809. try {
  810. extension_id = target_url?.split('://')[1]?.split('/')[0] || null;
  811. if (target_ctx) {
  812. const manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
  813. manifest_version = manifest?.manifest_version || null;
  814. }
  815. } catch (err) {
  816. // Failed to get extension metadata
  817. }
  818. }
  819. return {
  820. target_is_extension,
  821. target_is_bg,
  822. target_type,
  823. target_ctx,
  824. target_url,
  825. extension_id,
  826. manifest_version,
  827. };
  828. }
  829. /**
  830. * Load extension metadata and connection handlers from a browser target.
  831. *
  832. * @param {Array} extensions - Array of extension metadata objects to update
  833. * @param {Object} target - Puppeteer target object
  834. * @returns {Promise<Object|null>} - Updated extension object or null if not an extension
  835. */
  836. async function loadExtensionFromTarget(extensions, target) {
  837. const {
  838. target_is_bg,
  839. target_is_extension,
  840. target_type,
  841. target_ctx,
  842. target_url,
  843. extension_id,
  844. manifest_version,
  845. } = await isTargetExtension(target);
  846. if (!(target_is_bg && extension_id && target_ctx)) {
  847. return null;
  848. }
  849. // Find matching extension in our list
  850. const extension = extensions.find(ext => ext.id === extension_id);
  851. if (!extension) {
  852. console.warn(`[⚠️] Found loaded extension ${extension_id} that's not in CHROME_EXTENSIONS list`);
  853. return null;
  854. }
  855. // Load manifest from the extension context
  856. let manifest = null;
  857. try {
  858. manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
  859. } catch (err) {
  860. console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err);
  861. return null;
  862. }
  863. // Create dispatch methods for communicating with the extension
  864. const new_extension = {
  865. ...extension,
  866. target,
  867. target_type,
  868. target_url,
  869. manifest,
  870. manifest_version,
  871. // Trigger extension toolbar button click
  872. dispatchAction: async (tab) => {
  873. return await target_ctx.evaluate((tabId) => {
  874. return new Promise((resolve) => {
  875. chrome.action.onClicked.addListener((tab) => {
  876. resolve({ success: true, tab });
  877. });
  878. chrome.action.openPopup();
  879. });
  880. }, tab?.id || null);
  881. },
  882. // Send message to extension
  883. dispatchMessage: async (message, options = {}) => {
  884. return await target_ctx.evaluate((msg, opts) => {
  885. return new Promise((resolve) => {
  886. chrome.runtime.sendMessage(msg, opts, (response) => {
  887. resolve(response);
  888. });
  889. });
  890. }, message, options);
  891. },
  892. // Trigger extension command (keyboard shortcut)
  893. dispatchCommand: async (command) => {
  894. return await target_ctx.evaluate((cmd) => {
  895. return new Promise((resolve) => {
  896. chrome.commands.onCommand.addListener((receivedCommand) => {
  897. if (receivedCommand === cmd) {
  898. resolve({ success: true, command: receivedCommand });
  899. }
  900. });
  901. // Note: Actually triggering commands programmatically is not directly supported
  902. // This would need to be done via CDP or keyboard simulation
  903. });
  904. }, command);
  905. },
  906. };
  907. // Update the extension in the array
  908. Object.assign(extension, new_extension);
  909. console.log(`[🔌] Connected to extension ${extension.name} (${extension.version})`);
  910. return new_extension;
  911. }
  912. /**
  913. * Install all extensions in the list if not already installed.
  914. *
  915. * @param {Array} extensions - Array of extension metadata objects
  916. * @param {string} [extensions_dir] - Directory to store extensions
  917. * @returns {Promise<Array>} - Array of installed extension objects
  918. */
  919. async function installAllExtensions(extensions, extensions_dir = null) {
  920. console.log(`[⚙️] Installing ${extensions.length} chrome extensions...`);
  921. for (const extension of extensions) {
  922. await loadOrInstallExtension(extension, extensions_dir);
  923. }
  924. return extensions;
  925. }
  926. /**
  927. * Load and connect to all extensions from a running browser.
  928. *
  929. * @param {Object} browser - Puppeteer browser instance
  930. * @param {Array} extensions - Array of extension metadata objects
  931. * @returns {Promise<Array>} - Array of loaded extension objects with connection handlers
  932. */
  933. async function loadAllExtensionsFromBrowser(browser, extensions) {
  934. console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`);
  935. // Find loaded extensions at runtime by examining browser targets
  936. for (const target of browser.targets()) {
  937. await loadExtensionFromTarget(extensions, target);
  938. }
  939. return extensions;
  940. }
  941. /**
  942. * Load extension manifest.json file
  943. *
  944. * @param {string} unpacked_path - Path to unpacked extension directory
  945. * @returns {object|null} - Parsed manifest object or null if not found/invalid
  946. */
  947. function loadExtensionManifest(unpacked_path) {
  948. const manifest_path = path.join(unpacked_path, 'manifest.json');
  949. if (!fs.existsSync(manifest_path)) {
  950. return null;
  951. }
  952. try {
  953. const manifest_content = fs.readFileSync(manifest_path, 'utf-8');
  954. return JSON.parse(manifest_content);
  955. } catch (error) {
  956. // Invalid JSON or read error
  957. return null;
  958. }
  959. }
  960. /**
  961. * @deprecated Use puppeteer's enableExtensions option instead.
  962. *
  963. * Generate Chrome launch arguments for loading extensions.
  964. * NOTE: This is deprecated. Use puppeteer.launch({ pipe: true, enableExtensions: [paths] }) instead.
  965. *
  966. * @param {Array} extensions - Array of extension metadata objects
  967. * @returns {Array<string>} - Chrome CLI arguments for loading extensions
  968. */
  969. function getExtensionLaunchArgs(extensions) {
  970. console.warn('[DEPRECATED] getExtensionLaunchArgs is deprecated. Use puppeteer enableExtensions option instead.');
  971. if (!extensions || extensions.length === 0) {
  972. return [];
  973. }
  974. // Filter out extensions without unpacked_path first
  975. const validExtensions = extensions.filter(ext => ext.unpacked_path);
  976. const unpacked_paths = validExtensions.map(ext => ext.unpacked_path);
  977. // Use computed id (from path hash) for allowlisting, as that's what Chrome uses for unpacked extensions
  978. // Fall back to webstore_id if computed id not available
  979. const extension_ids = validExtensions.map(ext => ext.id || getExtensionId(ext.unpacked_path));
  980. return [
  981. `--load-extension=${unpacked_paths.join(',')}`,
  982. `--allowlisted-extension-id=${extension_ids.join(',')}`,
  983. '--allow-legacy-extension-manifests',
  984. '--disable-extensions-auto-update',
  985. ];
  986. }
  987. /**
  988. * Get extension paths for use with puppeteer's enableExtensions option.
  989. * Following puppeteer best practices: https://pptr.dev/guides/chrome-extensions
  990. *
  991. * @param {Array} extensions - Array of extension metadata objects
  992. * @returns {Array<string>} - Array of extension unpacked paths
  993. */
  994. function getExtensionPaths(extensions) {
  995. if (!extensions || extensions.length === 0) {
  996. return [];
  997. }
  998. return extensions
  999. .filter(ext => ext.unpacked_path)
  1000. .map(ext => ext.unpacked_path);
  1001. }
  1002. /**
  1003. * Wait for an extension target to be available in the browser.
  1004. * Following puppeteer best practices for accessing extension contexts.
  1005. *
  1006. * For Manifest V3 extensions (service workers):
  1007. * const worker = await waitForExtensionTarget(browser, extensionId);
  1008. * // worker is a WebWorker context
  1009. *
  1010. * For Manifest V2 extensions (background pages):
  1011. * const page = await waitForExtensionTarget(browser, extensionId);
  1012. * // page is a Page context
  1013. *
  1014. * @param {Object} browser - Puppeteer browser instance
  1015. * @param {string} extensionId - Extension ID to wait for (computed from path hash)
  1016. * @param {number} [timeout=30000] - Timeout in milliseconds
  1017. * @returns {Promise<Object>} - Worker or Page context for the extension
  1018. */
  1019. async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
  1020. // Try to find service worker first (Manifest V3)
  1021. try {
  1022. const workerTarget = await browser.waitForTarget(
  1023. target => target.type() === 'service_worker' &&
  1024. target.url().includes(`chrome-extension://${extensionId}`),
  1025. { timeout }
  1026. );
  1027. const worker = await workerTarget.worker();
  1028. if (worker) return worker;
  1029. } catch (err) {
  1030. // No service worker found, try background page
  1031. }
  1032. // Try background page (Manifest V2)
  1033. try {
  1034. const backgroundTarget = await browser.waitForTarget(
  1035. target => target.type() === 'background_page' &&
  1036. target.url().includes(`chrome-extension://${extensionId}`),
  1037. { timeout }
  1038. );
  1039. const page = await backgroundTarget.page();
  1040. if (page) return page;
  1041. } catch (err) {
  1042. // No background page found
  1043. }
  1044. // Try any extension page as fallback
  1045. const extTarget = await browser.waitForTarget(
  1046. target => target.url().startsWith(`chrome-extension://${extensionId}`),
  1047. { timeout }
  1048. );
  1049. // Return worker or page depending on target type
  1050. if (extTarget.type() === 'service_worker') {
  1051. return await extTarget.worker();
  1052. }
  1053. return await extTarget.page();
  1054. }
  1055. /**
  1056. * Get all loaded extension targets from a browser.
  1057. *
  1058. * @param {Object} browser - Puppeteer browser instance
  1059. * @returns {Array<Object>} - Array of extension target info objects
  1060. */
  1061. function getExtensionTargets(browser) {
  1062. return browser.targets()
  1063. .filter(target =>
  1064. target.url().startsWith('chrome-extension://') ||
  1065. target.type() === 'service_worker' ||
  1066. target.type() === 'background_page'
  1067. )
  1068. .map(target => ({
  1069. type: target.type(),
  1070. url: target.url(),
  1071. extensionId: target.url().includes('chrome-extension://')
  1072. ? target.url().split('chrome-extension://')[1]?.split('/')[0]
  1073. : null,
  1074. }));
  1075. }
  1076. /**
  1077. * Find Chromium/Chrome binary path.
  1078. * Checks CHROME_BINARY env var first, then falls back to system locations.
  1079. *
  1080. * @returns {string|null} - Absolute path to browser binary or null if not found
  1081. */
  1082. function findChromium() {
  1083. const { execSync } = require('child_process');
  1084. // Helper to validate a binary by running --version
  1085. const validateBinary = (binaryPath) => {
  1086. if (!binaryPath || !fs.existsSync(binaryPath)) return false;
  1087. try {
  1088. execSync(`"${binaryPath}" --version`, { encoding: 'utf8', timeout: 5000, stdio: 'pipe' });
  1089. return true;
  1090. } catch (e) {
  1091. return false;
  1092. }
  1093. };
  1094. // 1. Check CHROME_BINARY env var first
  1095. const chromeBinary = getEnv('CHROME_BINARY');
  1096. if (chromeBinary) {
  1097. const absPath = path.resolve(chromeBinary);
  1098. if (validateBinary(absPath)) {
  1099. return absPath;
  1100. }
  1101. console.error(`[!] Warning: CHROME_BINARY="${chromeBinary}" is not valid`);
  1102. }
  1103. // 2. Warn that no CHROME_BINARY is configured, searching fallbacks
  1104. if (!chromeBinary) {
  1105. console.error('[!] Warning: CHROME_BINARY not set, searching system locations...');
  1106. }
  1107. // Helper to find Chromium in @puppeteer/browsers directory structure
  1108. const findInPuppeteerDir = (baseDir) => {
  1109. if (!fs.existsSync(baseDir)) return null;
  1110. try {
  1111. const versions = fs.readdirSync(baseDir);
  1112. for (const version of versions.sort().reverse()) {
  1113. const versionDir = path.join(baseDir, version);
  1114. const candidates = [
  1115. path.join(versionDir, 'chrome-mac-arm64/Chromium.app/Contents/MacOS/Chromium'),
  1116. path.join(versionDir, 'chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
  1117. path.join(versionDir, 'chrome-mac-x64/Chromium.app/Contents/MacOS/Chromium'),
  1118. path.join(versionDir, 'chrome-linux64/chrome'),
  1119. path.join(versionDir, 'chrome-linux/chrome'),
  1120. ];
  1121. for (const c of candidates) {
  1122. if (fs.existsSync(c)) return c;
  1123. }
  1124. }
  1125. } catch (e) {}
  1126. return null;
  1127. };
  1128. // 3. Search fallback locations (Chromium first, then Chrome)
  1129. const fallbackLocations = [
  1130. // System Chromium
  1131. '/Applications/Chromium.app/Contents/MacOS/Chromium',
  1132. '/usr/bin/chromium',
  1133. '/usr/bin/chromium-browser',
  1134. // Puppeteer cache
  1135. path.join(process.env.HOME || '', '.cache/puppeteer/chromium'),
  1136. path.join(process.env.HOME || '', '.cache/puppeteer'),
  1137. // Chrome (fallback - extensions may not work in 137+)
  1138. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
  1139. '/usr/bin/google-chrome',
  1140. '/usr/bin/google-chrome-stable',
  1141. ];
  1142. for (const loc of fallbackLocations) {
  1143. // Check if it's a puppeteer cache dir
  1144. if (loc.includes('.cache/puppeteer')) {
  1145. const binary = findInPuppeteerDir(loc);
  1146. if (binary && validateBinary(binary)) {
  1147. return binary;
  1148. }
  1149. } else if (validateBinary(loc)) {
  1150. if (loc.includes('Google Chrome') || loc.includes('google-chrome')) {
  1151. console.error('[!] Warning: Using Chrome instead of Chromium. Extension loading may not work in Chrome 137+');
  1152. }
  1153. return loc;
  1154. }
  1155. }
  1156. return null;
  1157. }
  1158. // Export all functions
  1159. module.exports = {
  1160. // Environment helpers
  1161. getEnv,
  1162. getEnvBool,
  1163. getEnvInt,
  1164. getEnvArray,
  1165. parseResolution,
  1166. // PID file management
  1167. writePidWithMtime,
  1168. writeCmdScript,
  1169. // Port management
  1170. findFreePort,
  1171. waitForDebugPort,
  1172. // Zombie cleanup
  1173. killZombieChrome,
  1174. // Chrome launching
  1175. launchChromium,
  1176. killChrome,
  1177. // Chrome/Chromium install
  1178. installChromium,
  1179. installPuppeteerCore,
  1180. // Chrome/Chromium binary finding
  1181. findChromium,
  1182. // Extension utilities
  1183. getExtensionId,
  1184. loadExtensionManifest,
  1185. installExtension,
  1186. loadOrInstallExtension,
  1187. isTargetExtension,
  1188. loadExtensionFromTarget,
  1189. installAllExtensions,
  1190. loadAllExtensionsFromBrowser,
  1191. // New puppeteer best-practices helpers
  1192. getExtensionPaths,
  1193. waitForExtensionTarget,
  1194. getExtensionTargets,
  1195. // Deprecated - use enableExtensions option instead
  1196. getExtensionLaunchArgs,
  1197. };
  1198. // CLI usage
  1199. if (require.main === module) {
  1200. const args = process.argv.slice(2);
  1201. if (args.length === 0) {
  1202. console.log('Usage: chrome_utils.js <command> [args...]');
  1203. console.log('');
  1204. console.log('Commands:');
  1205. console.log(' findChromium');
  1206. console.log(' installChromium');
  1207. console.log(' installPuppeteerCore [npm_prefix]');
  1208. console.log(' launchChromium [output_dir] [extension_paths_json]');
  1209. console.log(' killChrome <pid> [output_dir]');
  1210. console.log(' killZombieChrome [data_dir]');
  1211. console.log(' getExtensionId <path>');
  1212. console.log(' loadExtensionManifest <path>');
  1213. console.log(' getExtensionLaunchArgs <extensions_json>');
  1214. console.log(' loadOrInstallExtension <webstore_id> <name> [extensions_dir]');
  1215. process.exit(1);
  1216. }
  1217. const [command, ...commandArgs] = args;
  1218. (async () => {
  1219. try {
  1220. switch (command) {
  1221. case 'findChromium': {
  1222. const binary = findChromium();
  1223. if (binary) {
  1224. console.log(binary);
  1225. } else {
  1226. console.error('Chromium binary not found');
  1227. process.exit(1);
  1228. }
  1229. break;
  1230. }
  1231. case 'installChromium': {
  1232. const result = await installChromium();
  1233. if (result.success) {
  1234. console.log(JSON.stringify({
  1235. binary: result.binary,
  1236. version: result.version,
  1237. }));
  1238. } else {
  1239. console.error(result.error);
  1240. process.exit(1);
  1241. }
  1242. break;
  1243. }
  1244. case 'installPuppeteerCore': {
  1245. const [npmPrefix] = commandArgs;
  1246. const result = await installPuppeteerCore({ npmPrefix: npmPrefix || undefined });
  1247. if (result.success) {
  1248. console.log(JSON.stringify({ path: result.path }));
  1249. } else {
  1250. console.error(result.error);
  1251. process.exit(1);
  1252. }
  1253. break;
  1254. }
  1255. case 'launchChromium': {
  1256. const [outputDir, extensionPathsJson] = commandArgs;
  1257. const extensionPaths = extensionPathsJson ? JSON.parse(extensionPathsJson) : [];
  1258. const result = await launchChromium({
  1259. outputDir: outputDir || 'chrome',
  1260. extensionPaths,
  1261. });
  1262. if (result.success) {
  1263. console.log(JSON.stringify({
  1264. cdpUrl: result.cdpUrl,
  1265. pid: result.pid,
  1266. port: result.port,
  1267. }));
  1268. } else {
  1269. console.error(result.error);
  1270. process.exit(1);
  1271. }
  1272. break;
  1273. }
  1274. case 'killChrome': {
  1275. const [pidStr, outputDir] = commandArgs;
  1276. const pid = parseInt(pidStr, 10);
  1277. if (isNaN(pid)) {
  1278. console.error('Invalid PID');
  1279. process.exit(1);
  1280. }
  1281. await killChrome(pid, outputDir);
  1282. break;
  1283. }
  1284. case 'killZombieChrome': {
  1285. const [dataDir] = commandArgs;
  1286. const killed = killZombieChrome(dataDir);
  1287. console.log(killed);
  1288. break;
  1289. }
  1290. case 'getExtensionId': {
  1291. const [unpacked_path] = commandArgs;
  1292. const id = getExtensionId(unpacked_path);
  1293. console.log(id);
  1294. break;
  1295. }
  1296. case 'loadExtensionManifest': {
  1297. const [unpacked_path] = commandArgs;
  1298. const manifest = loadExtensionManifest(unpacked_path);
  1299. console.log(JSON.stringify(manifest));
  1300. break;
  1301. }
  1302. case 'getExtensionLaunchArgs': {
  1303. const [extensions_json] = commandArgs;
  1304. const extensions = JSON.parse(extensions_json);
  1305. const launchArgs = getExtensionLaunchArgs(extensions);
  1306. console.log(JSON.stringify(launchArgs));
  1307. break;
  1308. }
  1309. case 'loadOrInstallExtension': {
  1310. const [webstore_id, name, extensions_dir] = commandArgs;
  1311. const ext = await loadOrInstallExtension({ webstore_id, name }, extensions_dir);
  1312. console.log(JSON.stringify(ext, null, 2));
  1313. break;
  1314. }
  1315. default:
  1316. console.error(`Unknown command: ${command}`);
  1317. process.exit(1);
  1318. }
  1319. } catch (error) {
  1320. console.error(`Error: ${error.message}`);
  1321. process.exit(1);
  1322. }
  1323. })();
  1324. }