singlefile_extension_save.js 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. #!/usr/bin/env node
  2. /**
  3. * Save a page using the SingleFile Chrome extension via an existing Chrome session.
  4. *
  5. * Usage: singlefile_extension_save.js --url=<url>
  6. * Output: prints saved file path on success
  7. */
  8. const fs = require('fs');
  9. const path = require('path');
  10. const CHROME_SESSION_DIR = '../chrome';
  11. const DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
  12. path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
  13. process.env.CHROME_DOWNLOADS_DIR = DOWNLOADS_DIR;
  14. async function setDownloadDir(page, downloadDir) {
  15. try {
  16. await fs.promises.mkdir(downloadDir, { recursive: true });
  17. const client = await page.target().createCDPSession();
  18. try {
  19. await client.send('Page.setDownloadBehavior', {
  20. behavior: 'allow',
  21. downloadPath: downloadDir,
  22. });
  23. } catch (err) {
  24. // Fallback for newer protocol versions
  25. await client.send('Browser.setDownloadBehavior', {
  26. behavior: 'allow',
  27. downloadPath: downloadDir,
  28. });
  29. }
  30. } catch (err) {
  31. console.error(`[⚠️] Failed to set download directory: ${err.message || err}`);
  32. }
  33. }
  34. function parseArgs() {
  35. const args = {};
  36. process.argv.slice(2).forEach((arg) => {
  37. if (arg.startsWith('--')) {
  38. const [key, ...valueParts] = arg.slice(2).split('=');
  39. args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
  40. }
  41. });
  42. return args;
  43. }
  44. async function main() {
  45. const args = parseArgs();
  46. const url = args.url;
  47. if (!url) {
  48. console.error('Usage: singlefile_extension_save.js --url=<url>');
  49. process.exit(1);
  50. }
  51. console.error(`[singlefile] helper start url=${url}`);
  52. console.error(`[singlefile] downloads_dir=${DOWNLOADS_DIR}`);
  53. if (process.env.CHROME_EXTENSIONS_DIR) {
  54. console.error(`[singlefile] extensions_dir=${process.env.CHROME_EXTENSIONS_DIR}`);
  55. }
  56. try {
  57. console.error('[singlefile] loading dependencies...');
  58. const puppeteer = require('puppeteer-core');
  59. const chromeUtils = require('../chrome/chrome_utils.js');
  60. const {
  61. EXTENSION,
  62. saveSinglefileWithExtension,
  63. } = require('./on_Crawl__82_singlefile_install.js');
  64. console.error('[singlefile] dependencies loaded');
  65. // Ensure extension is installed and metadata is cached
  66. console.error('[singlefile] ensuring extension cache...');
  67. const extension = await chromeUtils.installExtensionWithCache(
  68. EXTENSION,
  69. { extensionsDir: process.env.CHROME_EXTENSIONS_DIR }
  70. );
  71. if (!extension) {
  72. console.error('[❌] SingleFile extension not installed');
  73. process.exit(2);
  74. }
  75. if (extension.unpacked_path) {
  76. const runtimeId = chromeUtils.getExtensionId(extension.unpacked_path);
  77. if (runtimeId) {
  78. extension.id = runtimeId;
  79. }
  80. }
  81. console.error(`[singlefile] extension ready id=${extension.id} version=${extension.version}`);
  82. // Connect to existing Chrome session
  83. console.error('[singlefile] connecting to chrome session...');
  84. const { browser, page } = await chromeUtils.connectToPage({
  85. chromeSessionDir: CHROME_SESSION_DIR,
  86. timeoutMs: 60000,
  87. puppeteer,
  88. });
  89. console.error('[singlefile] connected to chrome');
  90. try {
  91. // Ensure CDP target discovery is enabled so service_worker targets appear
  92. try {
  93. const client = await page.createCDPSession();
  94. await client.send('Target.setDiscoverTargets', { discover: true });
  95. await client.send('Target.setAutoAttach', { autoAttach: true, waitForDebuggerOnStart: false, flatten: true });
  96. } catch (err) {
  97. console.error(`[singlefile] failed to enable target discovery: ${err.message || err}`);
  98. }
  99. // Wait for extension target to be available, then attach dispatchAction
  100. console.error('[singlefile] waiting for extension target...');
  101. const deadline = Date.now() + 30000;
  102. let matchTarget = null;
  103. let matchInfo = null;
  104. let lastLog = 0;
  105. const wantedName = (extension.name || 'singlefile').toLowerCase();
  106. while (Date.now() < deadline && !matchTarget) {
  107. const targets = browser.targets();
  108. for (const target of targets) {
  109. const info = await chromeUtils.isTargetExtension(target);
  110. if (!info?.target_is_extension || !info?.extension_id) {
  111. continue;
  112. }
  113. const manifestName = (info.manifest_name || '').toLowerCase();
  114. const targetUrl = (info.target_url || '').toLowerCase();
  115. const nameMatches = manifestName.includes(wantedName) || manifestName.includes('singlefile') || manifestName.includes('single-file');
  116. const urlMatches = targetUrl.includes('singlefile') || targetUrl.includes('single-file') || targetUrl.includes('single-file-extension');
  117. if (nameMatches || urlMatches) {
  118. matchTarget = target;
  119. matchInfo = info;
  120. break;
  121. }
  122. }
  123. if (!matchTarget) {
  124. if (Date.now() - lastLog > 5000) {
  125. const targetsSummary = [];
  126. for (const target of targets) {
  127. const info = await chromeUtils.isTargetExtension(target);
  128. if (!info?.target_is_extension) {
  129. continue;
  130. }
  131. targetsSummary.push({
  132. type: info.target_type,
  133. url: info.target_url,
  134. extensionId: info.extension_id,
  135. manifestName: info.manifest_name,
  136. });
  137. }
  138. console.error(`[singlefile] waiting... targets total=${targets.length} extensions=${targetsSummary.length} details=${JSON.stringify(targetsSummary)}`);
  139. lastLog = Date.now();
  140. }
  141. await new Promise(r => setTimeout(r, 500));
  142. }
  143. }
  144. if (!matchTarget || !matchInfo) {
  145. const targets = chromeUtils.getExtensionTargets(browser);
  146. console.error(`[singlefile] extension target not found (name=${extension.name})`);
  147. console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
  148. await browser.disconnect();
  149. process.exit(5);
  150. }
  151. // Use the runtime extension id from the matched target
  152. extension.id = matchInfo.extension_id;
  153. console.error('[singlefile] loading extension from target...');
  154. await chromeUtils.loadExtensionFromTarget([extension], matchTarget);
  155. if (typeof extension.dispatchAction !== 'function') {
  156. const targets = chromeUtils.getExtensionTargets(browser);
  157. console.error(`[singlefile] extension dispatchAction missing for id=${extension.id}`);
  158. console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
  159. await browser.disconnect();
  160. process.exit(6);
  161. }
  162. console.error('[singlefile] setting download dir...');
  163. await setDownloadDir(page, DOWNLOADS_DIR);
  164. console.error('[singlefile] triggering save via extension...');
  165. const output = await saveSinglefileWithExtension(page, extension, { downloadsDir: DOWNLOADS_DIR });
  166. if (output && fs.existsSync(output)) {
  167. console.error(`[singlefile] saved: ${output}`);
  168. console.log(output);
  169. await browser.disconnect();
  170. process.exit(0);
  171. }
  172. console.error('[❌] SingleFile extension did not produce output');
  173. await browser.disconnect();
  174. process.exit(3);
  175. } catch (err) {
  176. await browser.disconnect();
  177. throw err;
  178. }
  179. } catch (err) {
  180. console.error(`[❌] ${err.message || err}`);
  181. process.exit(4);
  182. }
  183. }
  184. if (require.main === module) {
  185. main();
  186. }