extract_cookies.js 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. #!/usr/bin/env node
  2. /**
  3. * Extract cookies from Chrome via CDP and write to Netscape cookies.txt format.
  4. *
  5. * This script launches Chrome with a given user data directory, connects via CDP,
  6. * extracts all cookies, and writes them to a cookies.txt file in Netscape format.
  7. *
  8. * Usage:
  9. * CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js
  10. *
  11. * Environment variables:
  12. * CHROME_USER_DATA_DIR: Path to Chrome user data directory (required)
  13. * COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required)
  14. * CHROME_HEADLESS: Run in headless mode (default: true)
  15. * NODE_MODULES_DIR: Path to node_modules for module resolution
  16. */
  17. // Add NODE_MODULES_DIR to module resolution paths if set
  18. if (process.env.NODE_MODULES_DIR) {
  19. module.paths.unshift(process.env.NODE_MODULES_DIR);
  20. }
  21. const fs = require('fs');
  22. const path = require('path');
  23. const {
  24. findAnyChromiumBinary,
  25. launchChromium,
  26. killChrome,
  27. getEnv,
  28. } = require('./chrome_utils.js');
  29. /**
  30. * Convert a cookie object to Netscape cookies.txt format line.
  31. *
  32. * Format: domain includeSubdomains path secure expiry name value
  33. *
  34. * @param {Object} cookie - CDP cookie object
  35. * @returns {string} - Netscape format cookie line
  36. */
  37. function cookieToNetscape(cookie) {
  38. // Domain: prefix with . for domain cookies (not host-only)
  39. let domain = cookie.domain;
  40. if (!domain.startsWith('.') && !cookie.hostOnly) {
  41. domain = '.' + domain;
  42. }
  43. // Include subdomains: TRUE if domain cookie (starts with .)
  44. const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
  45. // Path
  46. const cookiePath = cookie.path || '/';
  47. // Secure flag
  48. const secure = cookie.secure ? 'TRUE' : 'FALSE';
  49. // Expiry timestamp (0 for session cookies)
  50. let expiry = '0';
  51. if (cookie.expires && cookie.expires > 0) {
  52. // CDP returns expiry in seconds since epoch
  53. expiry = Math.floor(cookie.expires).toString();
  54. }
  55. // Name and value
  56. const name = cookie.name;
  57. const value = cookie.value;
  58. return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`;
  59. }
  60. /**
  61. * Write cookies to Netscape cookies.txt format file.
  62. *
  63. * @param {Array} cookies - Array of CDP cookie objects
  64. * @param {string} outputPath - Path to output file
  65. */
  66. function writeCookiesFile(cookies, outputPath) {
  67. const lines = [
  68. '# Netscape HTTP Cookie File',
  69. '# https://curl.se/docs/http-cookies.html',
  70. '# This file was generated by ArchiveBox persona cookie extraction',
  71. '#',
  72. '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
  73. '',
  74. ];
  75. for (const cookie of cookies) {
  76. lines.push(cookieToNetscape(cookie));
  77. }
  78. fs.writeFileSync(outputPath, lines.join('\n') + '\n');
  79. }
  80. async function main() {
  81. const userDataDir = getEnv('CHROME_USER_DATA_DIR');
  82. const outputFile = getEnv('COOKIES_OUTPUT_FILE');
  83. if (!userDataDir) {
  84. console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required');
  85. process.exit(1);
  86. }
  87. if (!outputFile) {
  88. console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required');
  89. process.exit(1);
  90. }
  91. if (!fs.existsSync(userDataDir)) {
  92. console.error(`ERROR: User data directory does not exist: ${userDataDir}`);
  93. process.exit(1);
  94. }
  95. const binary = findAnyChromiumBinary();
  96. if (!binary) {
  97. console.error('ERROR: Chromium-based browser binary not found');
  98. process.exit(1);
  99. }
  100. console.error(`[*] Extracting cookies from: ${userDataDir}`);
  101. console.error(`[*] Output file: ${outputFile}`);
  102. console.error(`[*] Using browser: ${binary}`);
  103. // Create a temporary output directory for Chrome files
  104. const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-'));
  105. let chromePid = null;
  106. try {
  107. // Launch Chrome with the user data directory
  108. const result = await launchChromium({
  109. binary,
  110. outputDir,
  111. userDataDir,
  112. headless: true,
  113. killZombies: false, // Don't kill other Chrome instances
  114. });
  115. if (!result.success) {
  116. console.error(`ERROR: Failed to launch Chrome: ${result.error}`);
  117. process.exit(1);
  118. }
  119. chromePid = result.pid;
  120. const cdpUrl = result.cdpUrl;
  121. const port = result.port;
  122. console.error(`[*] Chrome launched (PID: ${chromePid})`);
  123. console.error(`[*] CDP URL: ${cdpUrl}`);
  124. // Connect to CDP and get cookies
  125. const http = require('http');
  126. // Use CDP directly via HTTP to get all cookies
  127. const getCookies = () => {
  128. return new Promise((resolve, reject) => {
  129. const req = http.request(
  130. {
  131. hostname: '127.0.0.1',
  132. port: port,
  133. path: '/json/list',
  134. method: 'GET',
  135. },
  136. (res) => {
  137. let data = '';
  138. res.on('data', (chunk) => (data += chunk));
  139. res.on('end', () => {
  140. try {
  141. const targets = JSON.parse(data);
  142. // Find a page target
  143. const pageTarget = targets.find(t => t.type === 'page') || targets[0];
  144. if (!pageTarget) {
  145. reject(new Error('No page target found'));
  146. return;
  147. }
  148. // Connect via WebSocket and send CDP command
  149. const WebSocket = require('ws');
  150. const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
  151. ws.on('open', () => {
  152. ws.send(JSON.stringify({
  153. id: 1,
  154. method: 'Network.getAllCookies',
  155. }));
  156. });
  157. ws.on('message', (message) => {
  158. const response = JSON.parse(message);
  159. if (response.id === 1) {
  160. ws.close();
  161. if (response.result && response.result.cookies) {
  162. resolve(response.result.cookies);
  163. } else {
  164. reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
  165. }
  166. }
  167. });
  168. ws.on('error', (err) => {
  169. reject(err);
  170. });
  171. } catch (e) {
  172. reject(e);
  173. }
  174. });
  175. }
  176. );
  177. req.on('error', reject);
  178. req.end();
  179. });
  180. };
  181. // Wait a moment for the browser to fully initialize
  182. await new Promise(r => setTimeout(r, 2000));
  183. console.error('[*] Fetching cookies via CDP...');
  184. const cookies = await getCookies();
  185. console.error(`[+] Retrieved ${cookies.length} cookies`);
  186. // Write cookies to file
  187. writeCookiesFile(cookies, outputFile);
  188. console.error(`[+] Wrote cookies to: ${outputFile}`);
  189. // Clean up
  190. await killChrome(chromePid, outputDir);
  191. chromePid = null;
  192. // Remove temp directory
  193. fs.rmSync(outputDir, { recursive: true, force: true });
  194. console.error('[+] Cookie extraction complete');
  195. process.exit(0);
  196. } catch (error) {
  197. console.error(`ERROR: ${error.message}`);
  198. // Clean up on error
  199. if (chromePid) {
  200. await killChrome(chromePid, outputDir);
  201. }
  202. try {
  203. fs.rmSync(outputDir, { recursive: true, force: true });
  204. } catch (e) {}
  205. process.exit(1);
  206. }
  207. }
  208. main().catch((e) => {
  209. console.error(`Fatal error: ${e.message}`);
  210. process.exit(1);
  211. });