chrome_utils.js 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997
  1. #!/usr/bin/env node
  2. /**
  3. * Chrome Extension Management Utilities
  4. *
  5. * Handles downloading, installing, and managing Chrome extensions for browser automation.
  6. * Ported from the TypeScript implementation in archivebox.ts
  7. */
  8. const fs = require('fs');
  9. const path = require('path');
  10. const crypto = require('crypto');
  11. const http = require('http');
  12. const net = require('net');
  13. const { exec, spawn } = require('child_process');
  14. const { promisify } = require('util');
  15. const { Readable } = require('stream');
  16. const { finished } = require('stream/promises');
  17. const execAsync = promisify(exec);
  18. const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
  19. // ============================================================================
  20. // Environment helpers
  21. // ============================================================================
  22. /**
  23. * Get environment variable with default value.
  24. * @param {string} name - Environment variable name
  25. * @param {string} [defaultValue=''] - Default value if not set
  26. * @returns {string} - Trimmed environment variable value
  27. */
  28. function getEnv(name, defaultValue = '') {
  29. return (process.env[name] || defaultValue).trim();
  30. }
  31. /**
  32. * Get boolean environment variable.
  33. * @param {string} name - Environment variable name
  34. * @param {boolean} [defaultValue=false] - Default value if not set
  35. * @returns {boolean} - Boolean value
  36. */
  37. function getEnvBool(name, defaultValue = false) {
  38. const val = getEnv(name, '').toLowerCase();
  39. if (['true', '1', 'yes', 'on'].includes(val)) return true;
  40. if (['false', '0', 'no', 'off'].includes(val)) return false;
  41. return defaultValue;
  42. }
  43. /**
  44. * Get integer environment variable.
  45. * @param {string} name - Environment variable name
  46. * @param {number} [defaultValue=0] - Default value if not set
  47. * @returns {number} - Integer value
  48. */
  49. function getEnvInt(name, defaultValue = 0) {
  50. const val = parseInt(getEnv(name, String(defaultValue)), 10);
  51. return isNaN(val) ? defaultValue : val;
  52. }
  53. /**
  54. * Get array environment variable (JSON array or comma-separated string).
  55. *
  56. * Parsing strategy:
  57. * - If value starts with '[', parse as JSON array
  58. * - Otherwise, parse as comma-separated values
  59. *
  60. * This prevents incorrect splitting of arguments that contain internal commas.
  61. * For arguments with commas, use JSON format:
  62. * CHROME_ARGS='["--user-data-dir=/path/with,comma", "--window-size=1440,900"]'
  63. *
  64. * @param {string} name - Environment variable name
  65. * @param {string[]} [defaultValue=[]] - Default value if not set
  66. * @returns {string[]} - Array of strings
  67. */
  68. function getEnvArray(name, defaultValue = []) {
  69. const val = getEnv(name, '');
  70. if (!val) return defaultValue;
  71. // If starts with '[', parse as JSON array
  72. if (val.startsWith('[')) {
  73. try {
  74. const parsed = JSON.parse(val);
  75. if (Array.isArray(parsed)) return parsed;
  76. } catch (e) {
  77. console.error(`[!] Failed to parse ${name} as JSON array: ${e.message}`);
  78. // Fall through to comma-separated parsing
  79. }
  80. }
  81. // Parse as comma-separated values
  82. return val.split(',').map(s => s.trim()).filter(Boolean);
  83. }
  84. /**
  85. * Parse resolution string into width/height.
  86. * @param {string} resolution - Resolution string like "1440,2000"
  87. * @returns {{width: number, height: number}} - Parsed dimensions
  88. */
  89. function parseResolution(resolution) {
  90. const [width, height] = resolution.split(',').map(x => parseInt(x.trim(), 10));
  91. return { width: width || 1440, height: height || 2000 };
  92. }
  93. // ============================================================================
  94. // PID file management
  95. // ============================================================================
  96. /**
  97. * Write PID file with specific mtime for process validation.
  98. * @param {string} filePath - Path to PID file
  99. * @param {number} pid - Process ID
  100. * @param {number} startTimeSeconds - Process start time in seconds
  101. */
  102. function writePidWithMtime(filePath, pid, startTimeSeconds) {
  103. fs.writeFileSync(filePath, String(pid));
  104. const startTimeMs = startTimeSeconds * 1000;
  105. fs.utimesSync(filePath, new Date(startTimeMs), new Date(startTimeMs));
  106. }
  107. /**
  108. * Write a shell script that can re-run the Chrome command.
  109. * @param {string} filePath - Path to script file
  110. * @param {string} binary - Chrome binary path
  111. * @param {string[]} args - Chrome arguments
  112. */
  113. function writeCmdScript(filePath, binary, args) {
  114. const escape = (arg) =>
  115. arg.includes(' ') || arg.includes('"') || arg.includes('$')
  116. ? `"${arg.replace(/"/g, '\\"')}"`
  117. : arg;
  118. fs.writeFileSync(
  119. filePath,
  120. `#!/bin/bash\n${binary} ${args.map(escape).join(' ')}\n`
  121. );
  122. fs.chmodSync(filePath, 0o755);
  123. }
  124. // ============================================================================
  125. // Port management
  126. // ============================================================================
  127. /**
  128. * Find a free port on localhost.
  129. * @returns {Promise<number>} - Available port number
  130. */
  131. function findFreePort() {
  132. return new Promise((resolve, reject) => {
  133. const server = net.createServer();
  134. server.unref();
  135. server.on('error', reject);
  136. server.listen(0, () => {
  137. const port = server.address().port;
  138. server.close(() => resolve(port));
  139. });
  140. });
  141. }
  142. /**
  143. * Wait for Chrome's DevTools port to be ready.
  144. * @param {number} port - Debug port number
  145. * @param {number} [timeout=30000] - Timeout in milliseconds
  146. * @returns {Promise<Object>} - Chrome version info
  147. */
  148. function waitForDebugPort(port, timeout = 30000) {
  149. const startTime = Date.now();
  150. return new Promise((resolve, reject) => {
  151. const tryConnect = () => {
  152. if (Date.now() - startTime > timeout) {
  153. reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
  154. return;
  155. }
  156. const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
  157. let data = '';
  158. res.on('data', (chunk) => (data += chunk));
  159. res.on('end', () => {
  160. try {
  161. const info = JSON.parse(data);
  162. resolve(info);
  163. } catch (e) {
  164. setTimeout(tryConnect, 100);
  165. }
  166. });
  167. });
  168. req.on('error', () => {
  169. setTimeout(tryConnect, 100);
  170. });
  171. req.setTimeout(1000, () => {
  172. req.destroy();
  173. setTimeout(tryConnect, 100);
  174. });
  175. };
  176. tryConnect();
  177. });
  178. }
  179. // ============================================================================
  180. // Zombie process cleanup
  181. // ============================================================================
  182. /**
  183. * Kill zombie Chrome processes from stale crawls.
  184. * Recursively scans DATA_DIR for any .../chrome/...pid files from stale crawls.
  185. * Does not assume specific directory structure - works with nested paths.
  186. * @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.')
  187. * @returns {number} - Number of zombies killed
  188. */
  189. function killZombieChrome(dataDir = null) {
  190. dataDir = dataDir || getEnv('DATA_DIR', '.');
  191. const now = Date.now();
  192. const fiveMinutesAgo = now - 300000;
  193. let killed = 0;
  194. console.error('[*] Checking for zombie Chrome processes...');
  195. if (!fs.existsSync(dataDir)) {
  196. console.error('[+] No data directory found');
  197. return 0;
  198. }
  199. /**
  200. * Recursively find all chrome/.pid files in directory tree
  201. * @param {string} dir - Directory to search
  202. * @param {number} depth - Current recursion depth (limit to 10)
  203. * @returns {Array<{pidFile: string, crawlDir: string}>} - Array of PID file info
  204. */
  205. function findChromePidFiles(dir, depth = 0) {
  206. if (depth > 10) return []; // Prevent infinite recursion
  207. const results = [];
  208. try {
  209. const entries = fs.readdirSync(dir, { withFileTypes: true });
  210. for (const entry of entries) {
  211. if (!entry.isDirectory()) continue;
  212. const fullPath = path.join(dir, entry.name);
  213. // Found a chrome directory - check for .pid files
  214. if (entry.name === 'chrome') {
  215. try {
  216. const pidFiles = fs.readdirSync(fullPath).filter(f => f.endsWith('.pid'));
  217. const crawlDir = dir; // Parent of chrome/ is the crawl dir
  218. for (const pidFileName of pidFiles) {
  219. results.push({
  220. pidFile: path.join(fullPath, pidFileName),
  221. crawlDir: crawlDir,
  222. });
  223. }
  224. } catch (e) {
  225. // Skip if can't read chrome dir
  226. }
  227. } else {
  228. // Recurse into subdirectory (skip hidden dirs and node_modules)
  229. if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
  230. results.push(...findChromePidFiles(fullPath, depth + 1));
  231. }
  232. }
  233. }
  234. } catch (e) {
  235. // Skip if can't read directory
  236. }
  237. return results;
  238. }
  239. try {
  240. const chromePids = findChromePidFiles(dataDir);
  241. for (const {pidFile, crawlDir} of chromePids) {
  242. // Check if crawl was modified recently (still active)
  243. try {
  244. const crawlStats = fs.statSync(crawlDir);
  245. if (crawlStats.mtimeMs > fiveMinutesAgo) {
  246. continue; // Crawl is active, skip
  247. }
  248. } catch (e) {
  249. continue;
  250. }
  251. // Crawl is stale, check PID
  252. try {
  253. const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
  254. if (isNaN(pid) || pid <= 0) continue;
  255. // Check if process exists
  256. try {
  257. process.kill(pid, 0);
  258. } catch (e) {
  259. // Process dead, remove stale PID file
  260. try { fs.unlinkSync(pidFile); } catch (e) {}
  261. continue;
  262. }
  263. // Process alive and crawl is stale - zombie!
  264. console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${path.basename(crawlDir)}`);
  265. try {
  266. try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
  267. killed++;
  268. console.error(`[+] Killed zombie (PID ${pid})`);
  269. try { fs.unlinkSync(pidFile); } catch (e) {}
  270. } catch (e) {
  271. console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
  272. }
  273. } catch (e) {
  274. // Skip invalid PID files
  275. }
  276. }
  277. } catch (e) {
  278. console.error(`[!] Error scanning for Chrome processes: ${e.message}`);
  279. }
  280. if (killed > 0) {
  281. console.error(`[+] Killed ${killed} zombie process(es)`);
  282. } else {
  283. console.error('[+] No zombies found');
  284. }
  285. // Clean up stale SingletonLock files from persona chrome_user_data directories
  286. const personasDir = path.join(dataDir, 'personas');
  287. if (fs.existsSync(personasDir)) {
  288. try {
  289. const personas = fs.readdirSync(personasDir, { withFileTypes: true });
  290. for (const persona of personas) {
  291. if (!persona.isDirectory()) continue;
  292. const userDataDir = path.join(personasDir, persona.name, 'chrome_user_data');
  293. const singletonLock = path.join(userDataDir, 'SingletonLock');
  294. if (fs.existsSync(singletonLock)) {
  295. try {
  296. fs.unlinkSync(singletonLock);
  297. console.error(`[+] Removed stale SingletonLock: ${singletonLock}`);
  298. } catch (e) {
  299. // Ignore - may be in use by active Chrome
  300. }
  301. }
  302. }
  303. } catch (e) {
  304. // Ignore errors scanning personas directory
  305. }
  306. }
  307. return killed;
  308. }
  309. // ============================================================================
  310. // Chrome launching
  311. // ============================================================================
  312. /**
  313. * Launch Chromium with extensions and return connection info.
  314. *
  315. * @param {Object} options - Launch options
  316. * @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
  317. * @param {string} [options.outputDir='chrome'] - Directory for output files
  318. * @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
  319. * @param {string} [options.resolution='1440,2000'] - Window resolution
  320. * @param {boolean} [options.headless=true] - Run in headless mode
  321. * @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
  322. * @param {boolean} [options.checkSsl=true] - Check SSL certificates
  323. * @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
  324. * @param {boolean} [options.killZombies=true] - Kill zombie processes first
  325. * @returns {Promise<Object>} - {success, cdpUrl, pid, port, process, error}
  326. */
  327. async function launchChromium(options = {}) {
  328. const {
  329. binary = findChromium(),
  330. outputDir = 'chrome',
  331. userDataDir = getEnv('CHROME_USER_DATA_DIR'),
  332. resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
  333. userAgent = getEnv('CHROME_USER_AGENT') || getEnv('USER_AGENT', ''),
  334. headless = getEnvBool('CHROME_HEADLESS', true),
  335. sandbox = getEnvBool('CHROME_SANDBOX', true),
  336. checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
  337. extensionPaths = [],
  338. killZombies = true,
  339. } = options;
  340. if (!binary) {
  341. return { success: false, error: 'Chrome binary not found' };
  342. }
  343. const downloadsDir = getEnv('CHROME_DOWNLOADS_DIR');
  344. // Kill zombies first
  345. if (killZombies) {
  346. killZombieChrome();
  347. }
  348. const { width, height } = parseResolution(resolution);
  349. // Create output directory
  350. if (!fs.existsSync(outputDir)) {
  351. fs.mkdirSync(outputDir, { recursive: true });
  352. }
  353. // Create user data directory if specified and doesn't exist
  354. if (userDataDir) {
  355. if (!fs.existsSync(userDataDir)) {
  356. fs.mkdirSync(userDataDir, { recursive: true });
  357. console.error(`[*] Created user data directory: ${userDataDir}`);
  358. }
  359. // Clean up any stale SingletonLock file from previous crashed sessions
  360. const singletonLock = path.join(userDataDir, 'SingletonLock');
  361. if (fs.existsSync(singletonLock)) {
  362. try {
  363. fs.unlinkSync(singletonLock);
  364. console.error(`[*] Removed stale SingletonLock: ${singletonLock}`);
  365. } catch (e) {
  366. console.error(`[!] Failed to remove SingletonLock: ${e.message}`);
  367. }
  368. }
  369. if (downloadsDir) {
  370. try {
  371. const defaultProfileDir = path.join(userDataDir, 'Default');
  372. const prefsPath = path.join(defaultProfileDir, 'Preferences');
  373. fs.mkdirSync(defaultProfileDir, { recursive: true });
  374. let prefs = {};
  375. if (fs.existsSync(prefsPath)) {
  376. try {
  377. prefs = JSON.parse(fs.readFileSync(prefsPath, 'utf-8'));
  378. } catch (e) {
  379. prefs = {};
  380. }
  381. }
  382. prefs.download = prefs.download || {};
  383. prefs.download.default_directory = downloadsDir;
  384. prefs.download.prompt_for_download = false;
  385. fs.writeFileSync(prefsPath, JSON.stringify(prefs));
  386. console.error(`[*] Set Chrome download directory: ${downloadsDir}`);
  387. } catch (e) {
  388. console.error(`[!] Failed to set Chrome download directory: ${e.message}`);
  389. }
  390. }
  391. }
  392. // Find a free port
  393. const debugPort = await findFreePort();
  394. console.error(`[*] Using debug port: ${debugPort}`);
  395. // Get base Chrome args from config (static flags from CHROME_ARGS env var)
  396. // These come from config.json defaults, merged by get_config() in Python
  397. const baseArgs = getEnvArray('CHROME_ARGS', []);
  398. // Get extra user-provided args
  399. const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
  400. // Build dynamic Chrome arguments (these must be computed at runtime)
  401. const inDocker = getEnvBool('IN_DOCKER', false);
  402. const dynamicArgs = [
  403. // Remote debugging setup
  404. `--remote-debugging-port=${debugPort}`,
  405. '--remote-debugging-address=127.0.0.1',
  406. // Sandbox settings (disable in Docker)
  407. ...(sandbox ? [] : (inDocker ? ['--no-sandbox', '--disable-setuid-sandbox'] : [])),
  408. // Docker-specific workarounds
  409. '--disable-dev-shm-usage',
  410. // Window size
  411. `--window-size=${width},${height}`,
  412. // User data directory (for persistent sessions with persona)
  413. ...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
  414. // User agent
  415. ...(userAgent ? [`--user-agent=${userAgent}`] : []),
  416. // Headless mode
  417. ...(headless ? ['--headless=new'] : []),
  418. // SSL certificate checking
  419. ...(checkSsl ? [] : ['--ignore-certificate-errors']),
  420. ];
  421. // Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
  422. // Dynamic args come after base so they can override if needed
  423. const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
  424. // Ensure keychain prompts are disabled on macOS
  425. if (!chromiumArgs.includes('--use-mock-keychain')) {
  426. chromiumArgs.push('--use-mock-keychain');
  427. }
  428. // Add extension loading flags
  429. if (extensionPaths.length > 0) {
  430. const extPathsArg = extensionPaths.join(',');
  431. chromiumArgs.push(`--load-extension=${extPathsArg}`);
  432. chromiumArgs.push('--enable-unsafe-extension-debugging');
  433. chromiumArgs.push('--disable-features=DisableLoadExtensionCommandLineSwitch,ExtensionManifestV2Unsupported,ExtensionManifestV2Disabled');
  434. console.error(`[*] Loading ${extensionPaths.length} extension(s) via --load-extension`);
  435. }
  436. chromiumArgs.push('about:blank');
  437. // Write command script for debugging
  438. writeCmdScript(path.join(outputDir, 'cmd.sh'), binary, chromiumArgs);
  439. try {
  440. console.error(`[*] Spawning Chromium (headless=${headless})...`);
  441. const chromiumProcess = spawn(binary, chromiumArgs, {
  442. stdio: ['ignore', 'pipe', 'pipe'],
  443. detached: true,
  444. });
  445. const chromePid = chromiumProcess.pid;
  446. const chromeStartTime = Date.now() / 1000;
  447. if (chromePid) {
  448. console.error(`[*] Chromium spawned (PID: ${chromePid})`);
  449. writePidWithMtime(path.join(outputDir, 'chrome.pid'), chromePid, chromeStartTime);
  450. }
  451. // Pipe Chrome output to stderr
  452. chromiumProcess.stdout.on('data', (data) => {
  453. process.stderr.write(`[chromium:stdout] ${data}`);
  454. });
  455. chromiumProcess.stderr.on('data', (data) => {
  456. process.stderr.write(`[chromium:stderr] ${data}`);
  457. });
  458. // Wait for debug port
  459. console.error(`[*] Waiting for debug port ${debugPort}...`);
  460. const versionInfo = await waitForDebugPort(debugPort, 30000);
  461. const wsUrl = versionInfo.webSocketDebuggerUrl;
  462. console.error(`[+] Chromium ready: ${wsUrl}`);
  463. fs.writeFileSync(path.join(outputDir, 'cdp_url.txt'), wsUrl);
  464. fs.writeFileSync(path.join(outputDir, 'port.txt'), String(debugPort));
  465. return {
  466. success: true,
  467. cdpUrl: wsUrl,
  468. pid: chromePid,
  469. port: debugPort,
  470. process: chromiumProcess,
  471. };
  472. } catch (e) {
  473. return { success: false, error: `${e.name}: ${e.message}` };
  474. }
  475. }
  476. /**
  477. * Check if a process is still running.
  478. * @param {number} pid - Process ID to check
  479. * @returns {boolean} - True if process exists
  480. */
  481. function isProcessAlive(pid) {
  482. try {
  483. process.kill(pid, 0); // Signal 0 checks existence without killing
  484. return true;
  485. } catch (e) {
  486. return false;
  487. }
  488. }
  489. /**
  490. * Find all Chrome child processes for a given debug port.
  491. * @param {number} port - Debug port number
  492. * @returns {Array<number>} - Array of PIDs
  493. */
  494. function findChromeProcessesByPort(port) {
  495. const { execSync } = require('child_process');
  496. const pids = [];
  497. try {
  498. // Find all Chrome processes using this debug port
  499. const output = execSync(
  500. `ps aux | grep -i "chrome.*--remote-debugging-port=${port}" | grep -v grep | awk '{print $2}'`,
  501. { encoding: 'utf8', timeout: 5000 }
  502. );
  503. for (const line of output.split('\n')) {
  504. const pid = parseInt(line.trim(), 10);
  505. if (!isNaN(pid) && pid > 0) {
  506. pids.push(pid);
  507. }
  508. }
  509. } catch (e) {
  510. // Command failed or no processes found
  511. }
  512. return pids;
  513. }
  514. /**
  515. * Kill a Chrome process by PID.
  516. * Always sends SIGTERM before SIGKILL, then verifies death.
  517. *
  518. * @param {number} pid - Process ID to kill
  519. * @param {string} [outputDir] - Directory containing PID files to clean up
  520. */
  521. async function killChrome(pid, outputDir = null) {
  522. if (!pid) return;
  523. console.error(`[*] Killing Chrome process tree (PID ${pid})...`);
  524. // Get debug port for finding child processes
  525. let debugPort = null;
  526. if (outputDir) {
  527. try {
  528. const portFile = path.join(outputDir, 'port.txt');
  529. if (fs.existsSync(portFile)) {
  530. debugPort = parseInt(fs.readFileSync(portFile, 'utf8').trim(), 10);
  531. }
  532. } catch (e) {}
  533. }
  534. // Step 1: SIGTERM to process group (graceful shutdown)
  535. console.error(`[*] Sending SIGTERM to process group -${pid}...`);
  536. try {
  537. process.kill(-pid, 'SIGTERM');
  538. } catch (e) {
  539. try {
  540. console.error(`[*] Process group kill failed, trying single process...`);
  541. process.kill(pid, 'SIGTERM');
  542. } catch (e2) {
  543. console.error(`[!] SIGTERM failed: ${e2.message}`);
  544. }
  545. }
  546. // Step 2: Wait for graceful shutdown
  547. await new Promise(resolve => setTimeout(resolve, 2000));
  548. // Step 3: Check if still alive
  549. if (!isProcessAlive(pid)) {
  550. console.error('[+] Chrome process terminated gracefully');
  551. } else {
  552. // Step 4: Force kill ENTIRE process group with SIGKILL
  553. console.error(`[*] Process still alive, sending SIGKILL to process group -${pid}...`);
  554. try {
  555. process.kill(-pid, 'SIGKILL'); // Kill entire process group
  556. } catch (e) {
  557. console.error(`[!] Process group SIGKILL failed, trying single process: ${e.message}`);
  558. try {
  559. process.kill(pid, 'SIGKILL');
  560. } catch (e2) {
  561. console.error(`[!] SIGKILL failed: ${e2.message}`);
  562. }
  563. }
  564. // Step 5: Wait briefly and verify death
  565. await new Promise(resolve => setTimeout(resolve, 1000));
  566. if (isProcessAlive(pid)) {
  567. console.error(`[!] WARNING: Process ${pid} is unkillable (likely in UNE state)`);
  568. console.error(`[!] This typically happens when Chrome crashes in kernel syscall`);
  569. console.error(`[!] Process will remain as zombie until system reboot`);
  570. console.error(`[!] macOS IOSurface crash creates unkillable processes in UNE state`);
  571. // Try one more time to kill the entire process group
  572. if (debugPort) {
  573. const relatedPids = findChromeProcessesByPort(debugPort);
  574. if (relatedPids.length > 1) {
  575. console.error(`[*] Found ${relatedPids.length} Chrome processes still running on port ${debugPort}`);
  576. console.error(`[*] Attempting final process group SIGKILL...`);
  577. // Try to kill each unique process group we find
  578. const processGroups = new Set();
  579. for (const relatedPid of relatedPids) {
  580. if (relatedPid !== pid) {
  581. processGroups.add(relatedPid);
  582. }
  583. }
  584. for (const groupPid of processGroups) {
  585. try {
  586. process.kill(-groupPid, 'SIGKILL');
  587. } catch (e) {}
  588. }
  589. }
  590. }
  591. } else {
  592. console.error('[+] Chrome process group killed successfully');
  593. }
  594. }
  595. // Step 8: Clean up PID files
  596. // Note: hook-specific .pid files are cleaned up by run_hook() and Snapshot.cleanup()
  597. if (outputDir) {
  598. try { fs.unlinkSync(path.join(outputDir, 'chrome.pid')); } catch (e) {}
  599. }
  600. console.error('[*] Chrome cleanup completed');
  601. }
  602. /**
  603. * Install Chromium using @puppeteer/browsers programmatic API.
  604. * Uses puppeteer's default cache location, returns the binary path.
  605. *
  606. * @param {Object} options - Install options
  607. * @returns {Promise<Object>} - {success, binary, version, error}
  608. */
  609. async function installChromium(options = {}) {
  610. // Check if CHROME_BINARY is already set and valid
  611. const configuredBinary = getEnv('CHROME_BINARY');
  612. if (configuredBinary && fs.existsSync(configuredBinary)) {
  613. console.error(`[+] Using configured CHROME_BINARY: ${configuredBinary}`);
  614. return { success: true, binary: configuredBinary, version: null };
  615. }
  616. // Try to load @puppeteer/browsers from NODE_MODULES_DIR or system
  617. let puppeteerBrowsers;
  618. try {
  619. if (process.env.NODE_MODULES_DIR) {
  620. module.paths.unshift(process.env.NODE_MODULES_DIR);
  621. }
  622. puppeteerBrowsers = require('@puppeteer/browsers');
  623. } catch (e) {
  624. console.error(`[!] @puppeteer/browsers not found. Install it first with installPuppeteerCore.`);
  625. return { success: false, error: '@puppeteer/browsers not installed' };
  626. }
  627. console.error(`[*] Installing Chromium via @puppeteer/browsers...`);
  628. try {
  629. const result = await puppeteerBrowsers.install({
  630. browser: 'chromium',
  631. buildId: 'latest',
  632. });
  633. const binary = result.executablePath;
  634. const version = result.buildId;
  635. if (!binary || !fs.existsSync(binary)) {
  636. console.error(`[!] Chromium binary not found at: ${binary}`);
  637. return { success: false, error: `Chromium binary not found at: ${binary}` };
  638. }
  639. console.error(`[+] Chromium installed: ${binary}`);
  640. return { success: true, binary, version };
  641. } catch (e) {
  642. console.error(`[!] Failed to install Chromium: ${e.message}`);
  643. return { success: false, error: e.message };
  644. }
  645. }
  646. /**
  647. * Install puppeteer-core npm package.
  648. *
  649. * @param {Object} options - Install options
  650. * @param {string} [options.npmPrefix] - npm prefix directory (default: DATA_DIR/lib/<arch>/npm or ./node_modules parent)
  651. * @param {number} [options.timeout=60000] - Timeout in milliseconds
  652. * @returns {Promise<Object>} - {success, path, error}
  653. */
  654. async function installPuppeteerCore(options = {}) {
  655. const arch = `${process.arch}-${process.platform}`;
  656. const defaultPrefix = path.join(getEnv('LIB_DIR', getEnv('DATA_DIR', '.')), 'npm');
  657. const {
  658. npmPrefix = defaultPrefix,
  659. timeout = 60000,
  660. } = options;
  661. const nodeModulesDir = path.join(npmPrefix, 'node_modules');
  662. const puppeteerPath = path.join(nodeModulesDir, 'puppeteer-core');
  663. // Check if already installed
  664. if (fs.existsSync(puppeteerPath)) {
  665. console.error(`[+] puppeteer-core already installed: ${puppeteerPath}`);
  666. return { success: true, path: puppeteerPath };
  667. }
  668. console.error(`[*] Installing puppeteer-core to ${npmPrefix}...`);
  669. // Create directory
  670. if (!fs.existsSync(npmPrefix)) {
  671. fs.mkdirSync(npmPrefix, { recursive: true });
  672. }
  673. try {
  674. const { execSync } = require('child_process');
  675. execSync(
  676. `npm install --prefix "${npmPrefix}" puppeteer-core`,
  677. { encoding: 'utf8', timeout, stdio: ['pipe', 'pipe', 'pipe'] }
  678. );
  679. console.error(`[+] puppeteer-core installed successfully`);
  680. return { success: true, path: puppeteerPath };
  681. } catch (e) {
  682. console.error(`[!] Failed to install puppeteer-core: ${e.message}`);
  683. return { success: false, error: e.message };
  684. }
  685. }
  686. // Try to import unzipper, fallback to system unzip if not available
  687. let unzip = null;
  688. try {
  689. const unzipper = require('unzipper');
  690. unzip = async (sourcePath, destPath) => {
  691. const stream = fs.createReadStream(sourcePath).pipe(unzipper.Extract({ path: destPath }));
  692. return stream.promise();
  693. };
  694. } catch (err) {
  695. // Will use system unzip command as fallback
  696. }
  697. /**
  698. * Compute the extension ID from the unpacked path.
  699. * Chrome uses a SHA256 hash of the unpacked extension directory path to compute a dynamic id.
  700. *
  701. * @param {string} unpacked_path - Path to the unpacked extension directory
  702. * @returns {string} - 32-character extension ID
  703. */
  704. function getExtensionId(unpacked_path) {
  705. let resolved_path = unpacked_path;
  706. try {
  707. resolved_path = fs.realpathSync(unpacked_path);
  708. } catch (err) {
  709. // Use the provided path if realpath fails
  710. resolved_path = unpacked_path;
  711. }
  712. // Chrome uses a SHA256 hash of the unpacked extension directory path
  713. const hash = crypto.createHash('sha256');
  714. hash.update(Buffer.from(resolved_path, 'utf-8'));
  715. // Convert first 32 hex chars to characters in the range 'a'-'p'
  716. const detected_extension_id = Array.from(hash.digest('hex'))
  717. .slice(0, 32)
  718. .map(i => String.fromCharCode(parseInt(i, 16) + 'a'.charCodeAt(0)))
  719. .join('');
  720. return detected_extension_id;
  721. }
  722. /**
  723. * Download and install a Chrome extension from the Chrome Web Store.
  724. *
  725. * @param {Object} extension - Extension metadata object
  726. * @param {string} extension.webstore_id - Chrome Web Store extension ID
  727. * @param {string} extension.name - Human-readable extension name
  728. * @param {string} extension.crx_url - URL to download the CRX file
  729. * @param {string} extension.crx_path - Local path to save the CRX file
  730. * @param {string} extension.unpacked_path - Path to extract the extension
  731. * @returns {Promise<boolean>} - True if installation succeeded
  732. */
  733. async function installExtension(extension) {
  734. const manifest_path = path.join(extension.unpacked_path, 'manifest.json');
  735. // Download CRX file if not already downloaded
  736. if (!fs.existsSync(manifest_path) && !fs.existsSync(extension.crx_path)) {
  737. console.log(`[🛠️] Downloading missing extension ${extension.name} ${extension.webstore_id} -> ${extension.crx_path}`);
  738. try {
  739. // Ensure parent directory exists
  740. const crxDir = path.dirname(extension.crx_path);
  741. if (!fs.existsSync(crxDir)) {
  742. fs.mkdirSync(crxDir, { recursive: true });
  743. }
  744. // Download CRX file from Chrome Web Store
  745. const response = await fetch(extension.crx_url);
  746. if (!response.ok) {
  747. console.warn(`[⚠️] Failed to download extension ${extension.name}: HTTP ${response.status}`);
  748. return false;
  749. }
  750. if (response.body) {
  751. const crx_file = fs.createWriteStream(extension.crx_path);
  752. const crx_stream = Readable.fromWeb(response.body);
  753. await finished(crx_stream.pipe(crx_file));
  754. } else {
  755. console.warn(`[⚠️] Failed to download extension ${extension.name}: No response body`);
  756. return false;
  757. }
  758. } catch (err) {
  759. console.error(`[❌] Failed to download extension ${extension.name}:`, err);
  760. return false;
  761. }
  762. }
  763. // Unzip CRX file to unpacked_path (CRX files have extra header bytes but unzip handles it)
  764. await fs.promises.mkdir(extension.unpacked_path, { recursive: true });
  765. try {
  766. // Use -q to suppress warnings about extra bytes in CRX header
  767. await execAsync(`/usr/bin/unzip -q -o "${extension.crx_path}" -d "${extension.unpacked_path}"`);
  768. } catch (err1) {
  769. // unzip may return non-zero even on success due to CRX header warning, check if manifest exists
  770. if (!fs.existsSync(manifest_path)) {
  771. if (unzip) {
  772. // Fallback to unzipper library
  773. try {
  774. await unzip(extension.crx_path, extension.unpacked_path);
  775. } catch (err2) {
  776. console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err2.message);
  777. return false;
  778. }
  779. } else {
  780. console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message);
  781. return false;
  782. }
  783. }
  784. }
  785. if (!fs.existsSync(manifest_path)) {
  786. console.error(`[❌] Failed to install ${extension.crx_path}: could not find manifest.json in unpacked_path`);
  787. return false;
  788. }
  789. return true;
  790. }
  791. /**
  792. * Load or install a Chrome extension, computing all metadata.
  793. *
  794. * @param {Object} ext - Partial extension metadata (at minimum: webstore_id or unpacked_path)
  795. * @param {string} [ext.webstore_id] - Chrome Web Store extension ID
  796. * @param {string} [ext.name] - Human-readable extension name
  797. * @param {string} [ext.unpacked_path] - Path to unpacked extension
  798. * @param {string} [extensions_dir] - Directory to store extensions
  799. * @returns {Promise<Object>} - Complete extension metadata object
  800. */
  801. async function loadOrInstallExtension(ext, extensions_dir = null) {
  802. if (!(ext.webstore_id || ext.unpacked_path)) {
  803. throw new Error('Extension must have either {webstore_id} or {unpacked_path}');
  804. }
  805. // Determine extensions directory
  806. // Use provided dir, or fall back to getExtensionsDir() which handles env vars and defaults
  807. const EXTENSIONS_DIR = extensions_dir || getExtensionsDir();
  808. // Set statically computable extension metadata
  809. ext.webstore_id = ext.webstore_id || ext.id;
  810. ext.name = ext.name || ext.webstore_id;
  811. ext.webstore_url = ext.webstore_url || `https://chromewebstore.google.com/detail/${ext.webstore_id}`;
  812. ext.crx_url = ext.crx_url || `https://clients2.google.com/service/update2/crx?response=redirect&prodversion=1230&acceptformat=crx3&x=id%3D${ext.webstore_id}%26uc`;
  813. ext.crx_path = ext.crx_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}.crx`);
  814. ext.unpacked_path = ext.unpacked_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}`);
  815. const manifest_path = path.join(ext.unpacked_path, 'manifest.json');
  816. ext.read_manifest = () => JSON.parse(fs.readFileSync(manifest_path, 'utf-8'));
  817. ext.read_version = () => fs.existsSync(manifest_path) && ext.read_manifest()?.version || null;
  818. // If extension is not installed, download and unpack it
  819. if (!ext.read_version()) {
  820. await installExtension(ext);
  821. }
  822. // Autodetect ID from filesystem path (unpacked extensions don't have stable IDs)
  823. ext.id = getExtensionId(ext.unpacked_path);
  824. ext.version = ext.read_version();
  825. if (!ext.version) {
  826. console.warn(`[❌] Unable to detect ID and version of installed extension ${ext.unpacked_path}`);
  827. } else {
  828. console.log(`[➕] Installed extension ${ext.name} (${ext.version})... ${ext.unpacked_path}`);
  829. }
  830. return ext;
  831. }
  832. /**
  833. * Check if a Puppeteer target is an extension background page/service worker.
  834. *
  835. * @param {Object} target - Puppeteer target object
  836. * @returns {Promise<Object>} - Object with target_is_bg, extension_id, manifest_version, etc.
  837. */
  838. async function isTargetExtension(target) {
  839. let target_type;
  840. let target_ctx;
  841. let target_url;
  842. try {
  843. target_type = target.type();
  844. target_ctx = (await target.worker()) || (await target.page()) || null;
  845. target_url = target.url() || target_ctx?.url() || null;
  846. } catch (err) {
  847. if (String(err).includes('No target with given id found')) {
  848. // Target closed during check, ignore harmless race condition
  849. target_type = 'closed';
  850. target_ctx = null;
  851. target_url = 'about:closed';
  852. } else {
  853. throw err;
  854. }
  855. }
  856. // Check if this is an extension background page or service worker
  857. const is_chrome_extension = target_url?.startsWith('chrome-extension://');
  858. const is_background_page = target_type === 'background_page';
  859. const is_service_worker = target_type === 'service_worker';
  860. const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker);
  861. let extension_id = null;
  862. let manifest_version = null;
  863. let manifest = null;
  864. let manifest_name = null;
  865. const target_is_extension = is_chrome_extension || target_is_bg;
  866. if (target_is_extension) {
  867. try {
  868. extension_id = target_url?.split('://')[1]?.split('/')[0] || null;
  869. if (target_ctx) {
  870. manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
  871. manifest_version = manifest?.manifest_version || null;
  872. manifest_name = manifest?.name || null;
  873. }
  874. } catch (err) {
  875. // Failed to get extension metadata
  876. }
  877. }
  878. return {
  879. target_is_extension,
  880. target_is_bg,
  881. target_type,
  882. target_ctx,
  883. target_url,
  884. extension_id,
  885. manifest_version,
  886. manifest,
  887. manifest_name,
  888. };
  889. }
  890. /**
  891. * Load extension metadata and connection handlers from a browser target.
  892. *
  893. * @param {Array} extensions - Array of extension metadata objects to update
  894. * @param {Object} target - Puppeteer target object
  895. * @returns {Promise<Object|null>} - Updated extension object or null if not an extension
  896. */
  897. async function loadExtensionFromTarget(extensions, target) {
  898. const {
  899. target_is_bg,
  900. target_is_extension,
  901. target_type,
  902. target_ctx,
  903. target_url,
  904. extension_id,
  905. manifest_version,
  906. } = await isTargetExtension(target);
  907. if (!(target_is_bg && extension_id && target_ctx)) {
  908. return null;
  909. }
  910. // Find matching extension in our list
  911. const extension = extensions.find(ext => ext.id === extension_id);
  912. if (!extension) {
  913. console.warn(`[⚠️] Found loaded extension ${extension_id} that's not in CHROME_EXTENSIONS list`);
  914. return null;
  915. }
  916. // Load manifest from the extension context
  917. let manifest = null;
  918. try {
  919. manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
  920. } catch (err) {
  921. console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err);
  922. return null;
  923. }
  924. // Create dispatch methods for communicating with the extension
  925. const new_extension = {
  926. ...extension,
  927. target,
  928. target_type,
  929. target_url,
  930. manifest,
  931. manifest_version,
  932. // Trigger extension toolbar button click
  933. dispatchAction: async (tab) => {
  934. return await target_ctx.evaluate(async (tab) => {
  935. tab = tab || (await new Promise((resolve) =>
  936. chrome.tabs.query({ currentWindow: true, active: true }, ([tab]) => resolve(tab))
  937. ));
  938. // Manifest V3: chrome.action
  939. if (chrome.action?.onClicked?.dispatch) {
  940. return await chrome.action.onClicked.dispatch(tab);
  941. }
  942. // Manifest V2: chrome.browserAction
  943. if (chrome.browserAction?.onClicked?.dispatch) {
  944. return await chrome.browserAction.onClicked.dispatch(tab);
  945. }
  946. throw new Error('Extension action dispatch not available');
  947. }, tab || null);
  948. },
  949. // Send message to extension
  950. dispatchMessage: async (message, options = {}) => {
  951. return await target_ctx.evaluate((msg, opts) => {
  952. return new Promise((resolve) => {
  953. chrome.runtime.sendMessage(msg, opts, (response) => {
  954. resolve(response);
  955. });
  956. });
  957. }, message, options);
  958. },
  959. // Trigger extension command (keyboard shortcut)
  960. dispatchCommand: async (command) => {
  961. return await target_ctx.evaluate((cmd) => {
  962. return new Promise((resolve) => {
  963. chrome.commands.onCommand.addListener((receivedCommand) => {
  964. if (receivedCommand === cmd) {
  965. resolve({ success: true, command: receivedCommand });
  966. }
  967. });
  968. // Note: Actually triggering commands programmatically is not directly supported
  969. // This would need to be done via CDP or keyboard simulation
  970. });
  971. }, command);
  972. },
  973. };
  974. // Update the extension in the array
  975. Object.assign(extension, new_extension);
  976. console.log(`[🔌] Connected to extension ${extension.name} (${extension.version})`);
  977. return new_extension;
  978. }
  979. /**
  980. * Install all extensions in the list if not already installed.
  981. *
  982. * @param {Array} extensions - Array of extension metadata objects
  983. * @param {string} [extensions_dir] - Directory to store extensions
  984. * @returns {Promise<Array>} - Array of installed extension objects
  985. */
  986. async function installAllExtensions(extensions, extensions_dir = null) {
  987. console.log(`[⚙️] Installing ${extensions.length} chrome extensions...`);
  988. for (const extension of extensions) {
  989. await loadOrInstallExtension(extension, extensions_dir);
  990. }
  991. return extensions;
  992. }
  993. /**
  994. * Load and connect to all extensions from a running browser.
  995. *
  996. * @param {Object} browser - Puppeteer browser instance
  997. * @param {Array} extensions - Array of extension metadata objects
  998. * @returns {Promise<Array>} - Array of loaded extension objects with connection handlers
  999. */
  1000. async function loadAllExtensionsFromBrowser(browser, extensions) {
  1001. console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`);
  1002. // Find loaded extensions at runtime by examining browser targets
  1003. for (const target of browser.targets()) {
  1004. await loadExtensionFromTarget(extensions, target);
  1005. }
  1006. return extensions;
  1007. }
  1008. /**
  1009. * Load extension manifest.json file
  1010. *
  1011. * @param {string} unpacked_path - Path to unpacked extension directory
  1012. * @returns {object|null} - Parsed manifest object or null if not found/invalid
  1013. */
  1014. function loadExtensionManifest(unpacked_path) {
  1015. const manifest_path = path.join(unpacked_path, 'manifest.json');
  1016. if (!fs.existsSync(manifest_path)) {
  1017. return null;
  1018. }
  1019. try {
  1020. const manifest_content = fs.readFileSync(manifest_path, 'utf-8');
  1021. return JSON.parse(manifest_content);
  1022. } catch (error) {
  1023. // Invalid JSON or read error
  1024. return null;
  1025. }
  1026. }
  1027. /**
  1028. * @deprecated Use puppeteer's enableExtensions option instead.
  1029. *
  1030. * Generate Chrome launch arguments for loading extensions.
  1031. * NOTE: This is deprecated. Use puppeteer.launch({ pipe: true, enableExtensions: [paths] }) instead.
  1032. *
  1033. * @param {Array} extensions - Array of extension metadata objects
  1034. * @returns {Array<string>} - Chrome CLI arguments for loading extensions
  1035. */
  1036. function getExtensionLaunchArgs(extensions) {
  1037. console.warn('[DEPRECATED] getExtensionLaunchArgs is deprecated. Use puppeteer enableExtensions option instead.');
  1038. if (!extensions || extensions.length === 0) {
  1039. return [];
  1040. }
  1041. // Filter out extensions without unpacked_path first
  1042. const validExtensions = extensions.filter(ext => ext.unpacked_path);
  1043. const unpacked_paths = validExtensions.map(ext => ext.unpacked_path);
  1044. // Use computed id (from path hash) for allowlisting, as that's what Chrome uses for unpacked extensions
  1045. // Fall back to webstore_id if computed id not available
  1046. const extension_ids = validExtensions.map(ext => ext.id || getExtensionId(ext.unpacked_path));
  1047. return [
  1048. `--load-extension=${unpacked_paths.join(',')}`,
  1049. `--allowlisted-extension-id=${extension_ids.join(',')}`,
  1050. '--allow-legacy-extension-manifests',
  1051. '--disable-extensions-auto-update',
  1052. ];
  1053. }
  1054. /**
  1055. * Get extension paths for use with puppeteer's enableExtensions option.
  1056. * Following puppeteer best practices: https://pptr.dev/guides/chrome-extensions
  1057. *
  1058. * @param {Array} extensions - Array of extension metadata objects
  1059. * @returns {Array<string>} - Array of extension unpacked paths
  1060. */
  1061. function getExtensionPaths(extensions) {
  1062. if (!extensions || extensions.length === 0) {
  1063. return [];
  1064. }
  1065. return extensions
  1066. .filter(ext => ext.unpacked_path)
  1067. .map(ext => ext.unpacked_path);
  1068. }
  1069. /**
  1070. * Wait for an extension target to be available in the browser.
  1071. * Following puppeteer best practices for accessing extension contexts.
  1072. *
  1073. * For Manifest V3 extensions (service workers):
  1074. * const worker = await waitForExtensionTarget(browser, extensionId);
  1075. * // worker is a WebWorker context
  1076. *
  1077. * For Manifest V2 extensions (background pages):
  1078. * const page = await waitForExtensionTarget(browser, extensionId);
  1079. * // page is a Page context
  1080. *
  1081. * @param {Object} browser - Puppeteer browser instance
  1082. * @param {string} extensionId - Extension ID to wait for (computed from path hash)
  1083. * @param {number} [timeout=30000] - Timeout in milliseconds
  1084. * @returns {Promise<Object>} - Worker or Page context for the extension
  1085. */
  1086. async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
  1087. // Try to find service worker first (Manifest V3)
  1088. try {
  1089. const workerTarget = await browser.waitForTarget(
  1090. target => target.type() === 'service_worker' &&
  1091. target.url().includes(`chrome-extension://${extensionId}`),
  1092. { timeout }
  1093. );
  1094. const worker = await workerTarget.worker();
  1095. if (worker) return worker;
  1096. } catch (err) {
  1097. // No service worker found, try background page
  1098. }
  1099. // Try background page (Manifest V2)
  1100. try {
  1101. const backgroundTarget = await browser.waitForTarget(
  1102. target => target.type() === 'background_page' &&
  1103. target.url().includes(`chrome-extension://${extensionId}`),
  1104. { timeout }
  1105. );
  1106. const page = await backgroundTarget.page();
  1107. if (page) return page;
  1108. } catch (err) {
  1109. // No background page found
  1110. }
  1111. // Try any extension page as fallback
  1112. const extTarget = await browser.waitForTarget(
  1113. target => target.url().startsWith(`chrome-extension://${extensionId}`),
  1114. { timeout }
  1115. );
  1116. // Return worker or page depending on target type
  1117. if (extTarget.type() === 'service_worker') {
  1118. return await extTarget.worker();
  1119. }
  1120. return await extTarget.page();
  1121. }
  1122. /**
  1123. * Get all loaded extension targets from a browser.
  1124. *
  1125. * @param {Object} browser - Puppeteer browser instance
  1126. * @returns {Array<Object>} - Array of extension target info objects
  1127. */
  1128. function getExtensionTargets(browser) {
  1129. return browser.targets()
  1130. .filter(target =>
  1131. target.url().startsWith('chrome-extension://') ||
  1132. target.type() === 'service_worker' ||
  1133. target.type() === 'background_page'
  1134. )
  1135. .map(target => ({
  1136. type: target.type(),
  1137. url: target.url(),
  1138. extensionId: target.url().includes('chrome-extension://')
  1139. ? target.url().split('chrome-extension://')[1]?.split('/')[0]
  1140. : null,
  1141. }));
  1142. }
  1143. /**
  1144. * Find Chromium binary path.
  1145. * Checks CHROME_BINARY env var first, then falls back to system locations.
  1146. *
  1147. * @returns {string|null} - Absolute path to browser binary or null if not found
  1148. */
  1149. function findChromium() {
  1150. const { execSync } = require('child_process');
  1151. // Helper to validate a binary by running --version
  1152. const validateBinary = (binaryPath) => {
  1153. if (!binaryPath || !fs.existsSync(binaryPath)) return false;
  1154. try {
  1155. execSync(`"${binaryPath}" --version`, { encoding: 'utf8', timeout: 5000, stdio: 'pipe' });
  1156. return true;
  1157. } catch (e) {
  1158. return false;
  1159. }
  1160. };
  1161. // 1. Check CHROME_BINARY env var first
  1162. const chromeBinary = getEnv('CHROME_BINARY');
  1163. if (chromeBinary) {
  1164. const absPath = path.resolve(chromeBinary);
  1165. if (absPath.includes('Google Chrome') || absPath.includes('google-chrome')) {
  1166. console.error('[!] Warning: CHROME_BINARY points to Chrome. Chromium is required for extension support.');
  1167. } else if (validateBinary(absPath)) {
  1168. return absPath;
  1169. }
  1170. console.error(`[!] Warning: CHROME_BINARY="${chromeBinary}" is not valid`);
  1171. }
  1172. // 2. Warn that no CHROME_BINARY is configured, searching fallbacks
  1173. if (!chromeBinary) {
  1174. console.error('[!] Warning: CHROME_BINARY not set, searching system locations...');
  1175. }
  1176. // Helper to find Chromium in @puppeteer/browsers directory structure
  1177. const findInPuppeteerDir = (baseDir) => {
  1178. if (!fs.existsSync(baseDir)) return null;
  1179. try {
  1180. const versions = fs.readdirSync(baseDir);
  1181. for (const version of versions.sort().reverse()) {
  1182. const versionDir = path.join(baseDir, version);
  1183. const candidates = [
  1184. path.join(versionDir, 'chrome-mac-arm64/Chromium.app/Contents/MacOS/Chromium'),
  1185. path.join(versionDir, 'chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
  1186. path.join(versionDir, 'chrome-mac-x64/Chromium.app/Contents/MacOS/Chromium'),
  1187. path.join(versionDir, 'chrome-linux64/chrome'),
  1188. path.join(versionDir, 'chrome-linux/chrome'),
  1189. ];
  1190. for (const c of candidates) {
  1191. if (fs.existsSync(c)) return c;
  1192. }
  1193. }
  1194. } catch (e) {}
  1195. return null;
  1196. };
  1197. // 3. Search fallback locations (Chromium only)
  1198. const fallbackLocations = [
  1199. // System Chromium
  1200. '/Applications/Chromium.app/Contents/MacOS/Chromium',
  1201. '/usr/bin/chromium',
  1202. '/usr/bin/chromium-browser',
  1203. // Puppeteer cache
  1204. path.join(process.env.HOME || '', '.cache/puppeteer/chromium'),
  1205. path.join(process.env.HOME || '', '.cache/puppeteer'),
  1206. ];
  1207. for (const loc of fallbackLocations) {
  1208. // Check if it's a puppeteer cache dir
  1209. if (loc.includes('.cache/puppeteer')) {
  1210. const binary = findInPuppeteerDir(loc);
  1211. if (binary && validateBinary(binary)) {
  1212. return binary;
  1213. }
  1214. } else if (validateBinary(loc)) {
  1215. return loc;
  1216. }
  1217. }
  1218. return null;
  1219. }
  1220. /**
  1221. * Find Chromium binary path only (never Chrome/Brave/Edge).
  1222. * Prefers CHROME_BINARY if set, then Chromium.
  1223. *
  1224. * @returns {string|null} - Absolute path or command name to browser binary
  1225. */
  1226. function findAnyChromiumBinary() {
  1227. const chromiumBinary = findChromium();
  1228. if (chromiumBinary) return chromiumBinary;
  1229. return null;
  1230. }
  1231. // ============================================================================
  1232. // Shared Extension Installer Utilities
  1233. // ============================================================================
  1234. /**
  1235. * Get the extensions directory path.
  1236. * Centralized path calculation used by extension installers and chrome launch.
  1237. *
  1238. * Path is derived from environment variables in this priority:
  1239. * 1. CHROME_EXTENSIONS_DIR (explicit override)
  1240. * 2. DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions (default)
  1241. *
  1242. * @returns {string} - Absolute path to extensions directory
  1243. */
  1244. function getExtensionsDir() {
  1245. const dataDir = getEnv('DATA_DIR', '.');
  1246. const persona = getEnv('ACTIVE_PERSONA', 'Default');
  1247. return getEnv('CHROME_EXTENSIONS_DIR') ||
  1248. path.join(dataDir, 'personas', persona, 'chrome_extensions');
  1249. }
  1250. /**
  1251. * Get machine type string for platform-specific paths.
  1252. * Matches Python's archivebox.config.paths.get_machine_type()
  1253. *
  1254. * @returns {string} - Machine type (e.g., 'x86_64-linux', 'arm64-darwin')
  1255. */
  1256. function getMachineType() {
  1257. if (process.env.MACHINE_TYPE) {
  1258. return process.env.MACHINE_TYPE;
  1259. }
  1260. let machine = process.arch;
  1261. const system = process.platform;
  1262. // Normalize machine type to match Python's convention
  1263. if (machine === 'arm64' || machine === 'aarch64') {
  1264. machine = 'arm64';
  1265. } else if (machine === 'x64' || machine === 'x86_64' || machine === 'amd64') {
  1266. machine = 'x86_64';
  1267. } else if (machine === 'ia32' || machine === 'x86') {
  1268. machine = 'x86';
  1269. }
  1270. return `${machine}-${system}`;
  1271. }
  1272. /**
  1273. * Get LIB_DIR path for platform-specific binaries.
  1274. * Returns DATA_DIR/lib/MACHINE_TYPE/
  1275. *
  1276. * @returns {string} - Absolute path to lib directory
  1277. */
  1278. function getLibDir() {
  1279. if (process.env.LIB_DIR) {
  1280. return path.resolve(process.env.LIB_DIR);
  1281. }
  1282. const dataDir = getEnv('DATA_DIR', './data');
  1283. const machineType = getMachineType();
  1284. return path.resolve(path.join(dataDir, 'lib', machineType));
  1285. }
  1286. /**
  1287. * Get NODE_MODULES_DIR path for npm packages.
  1288. * Returns LIB_DIR/npm/node_modules/
  1289. *
  1290. * @returns {string} - Absolute path to node_modules directory
  1291. */
  1292. function getNodeModulesDir() {
  1293. if (process.env.NODE_MODULES_DIR) {
  1294. return path.resolve(process.env.NODE_MODULES_DIR);
  1295. }
  1296. return path.resolve(path.join(getLibDir(), 'npm', 'node_modules'));
  1297. }
  1298. /**
  1299. * Get all test environment paths as a JSON object.
  1300. * This is the single source of truth for path calculations - Python calls this
  1301. * to avoid duplicating path logic.
  1302. *
  1303. * @returns {Object} - Object with all test environment paths
  1304. */
  1305. function getTestEnv() {
  1306. const dataDir = getEnv('DATA_DIR', './data');
  1307. const machineType = getMachineType();
  1308. const libDir = getLibDir();
  1309. const nodeModulesDir = getNodeModulesDir();
  1310. return {
  1311. DATA_DIR: dataDir,
  1312. MACHINE_TYPE: machineType,
  1313. LIB_DIR: libDir,
  1314. NODE_MODULES_DIR: nodeModulesDir,
  1315. NODE_PATH: nodeModulesDir, // Node.js uses NODE_PATH for module resolution
  1316. NPM_BIN_DIR: path.join(libDir, 'npm', '.bin'),
  1317. CHROME_EXTENSIONS_DIR: getExtensionsDir(),
  1318. };
  1319. }
  1320. /**
  1321. * Install a Chrome extension with caching support.
  1322. *
  1323. * This is the main entry point for extension installer hooks. It handles:
  1324. * - Checking for cached extension metadata
  1325. * - Installing the extension if not cached
  1326. * - Writing cache file for future runs
  1327. *
  1328. * @param {Object} extension - Extension metadata object
  1329. * @param {string} extension.webstore_id - Chrome Web Store extension ID
  1330. * @param {string} extension.name - Human-readable extension name (used for cache file)
  1331. * @param {Object} [options] - Options
  1332. * @param {string} [options.extensionsDir] - Override extensions directory
  1333. * @param {boolean} [options.quiet=false] - Suppress info logging
  1334. * @returns {Promise<Object|null>} - Installed extension metadata or null on failure
  1335. */
  1336. async function installExtensionWithCache(extension, options = {}) {
  1337. const {
  1338. extensionsDir = getExtensionsDir(),
  1339. quiet = false,
  1340. } = options;
  1341. const cacheFile = path.join(extensionsDir, `${extension.name}.extension.json`);
  1342. // Check if extension is already cached and valid
  1343. if (fs.existsSync(cacheFile)) {
  1344. try {
  1345. const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
  1346. const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
  1347. if (fs.existsSync(manifestPath)) {
  1348. if (!quiet) {
  1349. console.log(`[*] ${extension.name} extension already installed (using cache)`);
  1350. }
  1351. return cached;
  1352. }
  1353. } catch (e) {
  1354. // Cache file corrupted, re-install
  1355. console.warn(`[⚠️] Extension cache corrupted for ${extension.name}, re-installing...`);
  1356. }
  1357. }
  1358. // Install extension
  1359. if (!quiet) {
  1360. console.log(`[*] Installing ${extension.name} extension...`);
  1361. }
  1362. const installedExt = await loadOrInstallExtension(extension, extensionsDir);
  1363. if (!installedExt?.version) {
  1364. console.error(`[❌] Failed to install ${extension.name} extension`);
  1365. return null;
  1366. }
  1367. // Write cache file
  1368. try {
  1369. await fs.promises.mkdir(extensionsDir, { recursive: true });
  1370. await fs.promises.writeFile(cacheFile, JSON.stringify(installedExt, null, 2));
  1371. if (!quiet) {
  1372. console.log(`[+] Extension metadata written to ${cacheFile}`);
  1373. }
  1374. } catch (e) {
  1375. console.warn(`[⚠️] Failed to write cache file: ${e.message}`);
  1376. }
  1377. if (!quiet) {
  1378. console.log(`[+] ${extension.name} extension installed`);
  1379. }
  1380. return installedExt;
  1381. }
  1382. // ============================================================================
  1383. // Snapshot Hook Utilities (for CDP-based plugins like ssl, responses, dns)
  1384. // ============================================================================
  1385. /**
  1386. * Parse command line arguments into an object.
  1387. * Handles --key=value and --flag formats.
  1388. *
  1389. * @returns {Object} - Parsed arguments object
  1390. */
  1391. function parseArgs() {
  1392. const args = {};
  1393. process.argv.slice(2).forEach(arg => {
  1394. if (arg.startsWith('--')) {
  1395. const [key, ...valueParts] = arg.slice(2).split('=');
  1396. args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
  1397. }
  1398. });
  1399. return args;
  1400. }
  1401. /**
  1402. * Wait for Chrome session files to be ready.
  1403. * Polls for cdp_url.txt and target_id.txt in the chrome session directory.
  1404. *
  1405. * @param {string} chromeSessionDir - Path to chrome session directory (e.g., '../chrome')
  1406. * @param {number} [timeoutMs=60000] - Timeout in milliseconds
  1407. * @returns {Promise<boolean>} - True if files are ready, false if timeout
  1408. */
  1409. async function waitForChromeSession(chromeSessionDir, timeoutMs = 60000) {
  1410. const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
  1411. const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
  1412. const startTime = Date.now();
  1413. while (Date.now() - startTime < timeoutMs) {
  1414. if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
  1415. return true;
  1416. }
  1417. await new Promise(resolve => setTimeout(resolve, 100));
  1418. }
  1419. return false;
  1420. }
  1421. /**
  1422. * Read CDP WebSocket URL from chrome session directory.
  1423. *
  1424. * @param {string} chromeSessionDir - Path to chrome session directory
  1425. * @returns {string|null} - CDP URL or null if not found
  1426. */
  1427. function readCdpUrl(chromeSessionDir) {
  1428. const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
  1429. if (fs.existsSync(cdpFile)) {
  1430. return fs.readFileSync(cdpFile, 'utf8').trim();
  1431. }
  1432. return null;
  1433. }
  1434. /**
  1435. * Read target ID from chrome session directory.
  1436. *
  1437. * @param {string} chromeSessionDir - Path to chrome session directory
  1438. * @returns {string|null} - Target ID or null if not found
  1439. */
  1440. function readTargetId(chromeSessionDir) {
  1441. const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
  1442. if (fs.existsSync(targetIdFile)) {
  1443. return fs.readFileSync(targetIdFile, 'utf8').trim();
  1444. }
  1445. return null;
  1446. }
  1447. /**
  1448. * Connect to Chrome browser and find the target page.
  1449. * This is a high-level utility that handles all the connection logic:
  1450. * 1. Wait for chrome session files
  1451. * 2. Connect to browser via CDP
  1452. * 3. Find the target page by ID
  1453. *
  1454. * @param {Object} options - Connection options
  1455. * @param {string} [options.chromeSessionDir='../chrome'] - Path to chrome session directory
  1456. * @param {number} [options.timeoutMs=60000] - Timeout for waiting
  1457. * @param {Object} [options.puppeteer] - Puppeteer module (must be passed in)
  1458. * @returns {Promise<Object>} - { browser, page, targetId, cdpUrl }
  1459. * @throws {Error} - If connection fails or page not found
  1460. */
  1461. async function connectToPage(options = {}) {
  1462. const {
  1463. chromeSessionDir = '../chrome',
  1464. timeoutMs = 60000,
  1465. puppeteer,
  1466. } = options;
  1467. if (!puppeteer) {
  1468. throw new Error('puppeteer module must be passed to connectToPage()');
  1469. }
  1470. // Wait for chrome session to be ready
  1471. const sessionReady = await waitForChromeSession(chromeSessionDir, timeoutMs);
  1472. if (!sessionReady) {
  1473. throw new Error(CHROME_SESSION_REQUIRED_ERROR);
  1474. }
  1475. // Read session files
  1476. const cdpUrl = readCdpUrl(chromeSessionDir);
  1477. if (!cdpUrl) {
  1478. throw new Error(CHROME_SESSION_REQUIRED_ERROR);
  1479. }
  1480. const targetId = readTargetId(chromeSessionDir);
  1481. // Connect to browser
  1482. const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
  1483. // Find the target page
  1484. const pages = await browser.pages();
  1485. let page = null;
  1486. if (targetId) {
  1487. page = pages.find(p => {
  1488. const target = p.target();
  1489. return target && target._targetId === targetId;
  1490. });
  1491. }
  1492. // Fallback to last page if target not found
  1493. if (!page) {
  1494. page = pages[pages.length - 1];
  1495. }
  1496. if (!page) {
  1497. throw new Error('No page found in browser');
  1498. }
  1499. return { browser, page, targetId, cdpUrl };
  1500. }
  1501. /**
  1502. * Wait for page navigation to complete.
  1503. * Polls for page_loaded.txt marker file written by chrome_navigate.
  1504. *
  1505. * @param {string} chromeSessionDir - Path to chrome session directory
  1506. * @param {number} [timeoutMs=120000] - Timeout in milliseconds
  1507. * @param {number} [postLoadDelayMs=0] - Additional delay after page load marker
  1508. * @returns {Promise<void>}
  1509. * @throws {Error} - If timeout waiting for navigation
  1510. */
  1511. async function waitForPageLoaded(chromeSessionDir, timeoutMs = 120000, postLoadDelayMs = 0) {
  1512. const pageLoadedMarker = path.join(chromeSessionDir, 'page_loaded.txt');
  1513. const pollInterval = 100;
  1514. let waitTime = 0;
  1515. while (!fs.existsSync(pageLoadedMarker) && waitTime < timeoutMs) {
  1516. await new Promise(resolve => setTimeout(resolve, pollInterval));
  1517. waitTime += pollInterval;
  1518. }
  1519. if (!fs.existsSync(pageLoadedMarker)) {
  1520. throw new Error('Timeout waiting for navigation (chrome_navigate did not complete)');
  1521. }
  1522. // Optional post-load delay for late responses
  1523. if (postLoadDelayMs > 0) {
  1524. await new Promise(resolve => setTimeout(resolve, postLoadDelayMs));
  1525. }
  1526. }
  1527. // Export all functions
  1528. module.exports = {
  1529. // Environment helpers
  1530. getEnv,
  1531. getEnvBool,
  1532. getEnvInt,
  1533. getEnvArray,
  1534. parseResolution,
  1535. // PID file management
  1536. writePidWithMtime,
  1537. writeCmdScript,
  1538. // Port management
  1539. findFreePort,
  1540. waitForDebugPort,
  1541. // Zombie cleanup
  1542. killZombieChrome,
  1543. // Chrome launching
  1544. launchChromium,
  1545. killChrome,
  1546. // Chromium install
  1547. installChromium,
  1548. installPuppeteerCore,
  1549. // Chromium binary finding
  1550. findChromium,
  1551. findAnyChromiumBinary,
  1552. // Extension utilities
  1553. getExtensionId,
  1554. loadExtensionManifest,
  1555. installExtension,
  1556. loadOrInstallExtension,
  1557. isTargetExtension,
  1558. loadExtensionFromTarget,
  1559. installAllExtensions,
  1560. loadAllExtensionsFromBrowser,
  1561. // New puppeteer best-practices helpers
  1562. getExtensionPaths,
  1563. waitForExtensionTarget,
  1564. getExtensionTargets,
  1565. // Shared path utilities (single source of truth for Python/JS)
  1566. getMachineType,
  1567. getLibDir,
  1568. getNodeModulesDir,
  1569. getExtensionsDir,
  1570. getTestEnv,
  1571. // Shared extension installer utilities
  1572. installExtensionWithCache,
  1573. // Deprecated - use enableExtensions option instead
  1574. getExtensionLaunchArgs,
  1575. // Snapshot hook utilities (for CDP-based plugins)
  1576. parseArgs,
  1577. waitForChromeSession,
  1578. readCdpUrl,
  1579. readTargetId,
  1580. connectToPage,
  1581. waitForPageLoaded,
  1582. };
  1583. // CLI usage
  1584. if (require.main === module) {
  1585. const args = process.argv.slice(2);
  1586. if (args.length === 0) {
  1587. console.log('Usage: chrome_utils.js <command> [args...]');
  1588. console.log('');
  1589. console.log('Commands:');
  1590. console.log(' findChromium Find Chromium binary');
  1591. console.log(' installChromium Install Chromium via @puppeteer/browsers');
  1592. console.log(' installPuppeteerCore Install puppeteer-core npm package');
  1593. console.log(' launchChromium Launch Chrome with CDP debugging');
  1594. console.log(' killChrome <pid> Kill Chrome process by PID');
  1595. console.log(' killZombieChrome Clean up zombie Chrome processes');
  1596. console.log('');
  1597. console.log(' getMachineType Get machine type (e.g., x86_64-linux)');
  1598. console.log(' getLibDir Get LIB_DIR path');
  1599. console.log(' getNodeModulesDir Get NODE_MODULES_DIR path');
  1600. console.log(' getExtensionsDir Get Chrome extensions directory');
  1601. console.log(' getTestEnv Get all paths as JSON (for tests)');
  1602. console.log('');
  1603. console.log(' getExtensionId <path> Get extension ID from unpacked path');
  1604. console.log(' loadExtensionManifest Load extension manifest.json');
  1605. console.log(' loadOrInstallExtension Load or install an extension');
  1606. console.log(' installExtensionWithCache Install extension with caching');
  1607. console.log('');
  1608. console.log('Environment variables:');
  1609. console.log(' DATA_DIR Base data directory');
  1610. console.log(' LIB_DIR Library directory (computed if not set)');
  1611. console.log(' MACHINE_TYPE Machine type override');
  1612. console.log(' NODE_MODULES_DIR Node modules directory');
  1613. console.log(' CHROME_BINARY Chrome binary path');
  1614. console.log(' CHROME_EXTENSIONS_DIR Extensions directory');
  1615. process.exit(1);
  1616. }
  1617. const [command, ...commandArgs] = args;
  1618. (async () => {
  1619. try {
  1620. switch (command) {
  1621. case 'findChromium': {
  1622. const binary = findChromium();
  1623. if (binary) {
  1624. console.log(binary);
  1625. } else {
  1626. console.error('Chromium binary not found');
  1627. process.exit(1);
  1628. }
  1629. break;
  1630. }
  1631. case 'installChromium': {
  1632. const result = await installChromium();
  1633. if (result.success) {
  1634. console.log(JSON.stringify({
  1635. binary: result.binary,
  1636. version: result.version,
  1637. }));
  1638. } else {
  1639. console.error(result.error);
  1640. process.exit(1);
  1641. }
  1642. break;
  1643. }
  1644. case 'installPuppeteerCore': {
  1645. const [npmPrefix] = commandArgs;
  1646. const result = await installPuppeteerCore({ npmPrefix: npmPrefix || undefined });
  1647. if (result.success) {
  1648. console.log(JSON.stringify({ path: result.path }));
  1649. } else {
  1650. console.error(result.error);
  1651. process.exit(1);
  1652. }
  1653. break;
  1654. }
  1655. case 'launchChromium': {
  1656. const [outputDir, extensionPathsJson] = commandArgs;
  1657. const extensionPaths = extensionPathsJson ? JSON.parse(extensionPathsJson) : [];
  1658. const result = await launchChromium({
  1659. outputDir: outputDir || 'chrome',
  1660. extensionPaths,
  1661. });
  1662. if (result.success) {
  1663. console.log(JSON.stringify({
  1664. cdpUrl: result.cdpUrl,
  1665. pid: result.pid,
  1666. port: result.port,
  1667. }));
  1668. } else {
  1669. console.error(result.error);
  1670. process.exit(1);
  1671. }
  1672. break;
  1673. }
  1674. case 'killChrome': {
  1675. const [pidStr, outputDir] = commandArgs;
  1676. const pid = parseInt(pidStr, 10);
  1677. if (isNaN(pid)) {
  1678. console.error('Invalid PID');
  1679. process.exit(1);
  1680. }
  1681. await killChrome(pid, outputDir);
  1682. break;
  1683. }
  1684. case 'killZombieChrome': {
  1685. const [dataDir] = commandArgs;
  1686. const killed = killZombieChrome(dataDir);
  1687. console.log(killed);
  1688. break;
  1689. }
  1690. case 'getExtensionId': {
  1691. const [unpacked_path] = commandArgs;
  1692. const id = getExtensionId(unpacked_path);
  1693. console.log(id);
  1694. break;
  1695. }
  1696. case 'loadExtensionManifest': {
  1697. const [unpacked_path] = commandArgs;
  1698. const manifest = loadExtensionManifest(unpacked_path);
  1699. console.log(JSON.stringify(manifest));
  1700. break;
  1701. }
  1702. case 'getExtensionLaunchArgs': {
  1703. const [extensions_json] = commandArgs;
  1704. const extensions = JSON.parse(extensions_json);
  1705. const launchArgs = getExtensionLaunchArgs(extensions);
  1706. console.log(JSON.stringify(launchArgs));
  1707. break;
  1708. }
  1709. case 'loadOrInstallExtension': {
  1710. const [webstore_id, name, extensions_dir] = commandArgs;
  1711. const ext = await loadOrInstallExtension({ webstore_id, name }, extensions_dir);
  1712. console.log(JSON.stringify(ext, null, 2));
  1713. break;
  1714. }
  1715. case 'getMachineType': {
  1716. console.log(getMachineType());
  1717. break;
  1718. }
  1719. case 'getLibDir': {
  1720. console.log(getLibDir());
  1721. break;
  1722. }
  1723. case 'getNodeModulesDir': {
  1724. console.log(getNodeModulesDir());
  1725. break;
  1726. }
  1727. case 'getExtensionsDir': {
  1728. console.log(getExtensionsDir());
  1729. break;
  1730. }
  1731. case 'getTestEnv': {
  1732. console.log(JSON.stringify(getTestEnv(), null, 2));
  1733. break;
  1734. }
  1735. case 'installExtensionWithCache': {
  1736. const [webstore_id, name] = commandArgs;
  1737. if (!webstore_id || !name) {
  1738. console.error('Usage: installExtensionWithCache <webstore_id> <name>');
  1739. process.exit(1);
  1740. }
  1741. const ext = await installExtensionWithCache({ webstore_id, name });
  1742. if (ext) {
  1743. console.log(JSON.stringify(ext, null, 2));
  1744. } else {
  1745. process.exit(1);
  1746. }
  1747. break;
  1748. }
  1749. default:
  1750. console.error(`Unknown command: ${command}`);
  1751. process.exit(1);
  1752. }
  1753. } catch (error) {
  1754. console.error(`Error: ${error.message}`);
  1755. process.exit(1);
  1756. }
  1757. })();
  1758. }