| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- #!/usr/bin/env node
- /**
- * Save a page using the SingleFile Chrome extension via an existing Chrome session.
- *
- * Usage: singlefile_extension_save.js --url=<url>
- * Output: prints saved file path on success
- */
- const fs = require('fs');
- const path = require('path');
- const CHROME_SESSION_DIR = '../chrome';
- const DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
- path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
- process.env.CHROME_DOWNLOADS_DIR = DOWNLOADS_DIR;
- async function setDownloadDir(page, downloadDir) {
- try {
- await fs.promises.mkdir(downloadDir, { recursive: true });
- const client = await page.target().createCDPSession();
- try {
- await client.send('Page.setDownloadBehavior', {
- behavior: 'allow',
- downloadPath: downloadDir,
- });
- } catch (err) {
- // Fallback for newer protocol versions
- await client.send('Browser.setDownloadBehavior', {
- behavior: 'allow',
- downloadPath: downloadDir,
- });
- }
- } catch (err) {
- console.error(`[⚠️] Failed to set download directory: ${err.message || err}`);
- }
- }
- function parseArgs() {
- const args = {};
- process.argv.slice(2).forEach((arg) => {
- if (arg.startsWith('--')) {
- const [key, ...valueParts] = arg.slice(2).split('=');
- args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
- }
- });
- return args;
- }
- async function main() {
- const args = parseArgs();
- const url = args.url;
- if (!url) {
- console.error('Usage: singlefile_extension_save.js --url=<url>');
- process.exit(1);
- }
- console.error(`[singlefile] helper start url=${url}`);
- console.error(`[singlefile] downloads_dir=${DOWNLOADS_DIR}`);
- if (process.env.CHROME_EXTENSIONS_DIR) {
- console.error(`[singlefile] extensions_dir=${process.env.CHROME_EXTENSIONS_DIR}`);
- }
- try {
- console.error('[singlefile] loading dependencies...');
- const puppeteer = require('puppeteer-core');
- const chromeUtils = require('../chrome/chrome_utils.js');
- const {
- EXTENSION,
- saveSinglefileWithExtension,
- } = require('./on_Crawl__82_singlefile_install.js');
- console.error('[singlefile] dependencies loaded');
- // Ensure extension is installed and metadata is cached
- console.error('[singlefile] ensuring extension cache...');
- const extension = await chromeUtils.installExtensionWithCache(
- EXTENSION,
- { extensionsDir: process.env.CHROME_EXTENSIONS_DIR }
- );
- if (!extension) {
- console.error('[❌] SingleFile extension not installed');
- process.exit(2);
- }
- if (extension.unpacked_path) {
- const runtimeId = chromeUtils.getExtensionId(extension.unpacked_path);
- if (runtimeId) {
- extension.id = runtimeId;
- }
- }
- console.error(`[singlefile] extension ready id=${extension.id} version=${extension.version}`);
- // Connect to existing Chrome session
- console.error('[singlefile] connecting to chrome session...');
- const { browser, page } = await chromeUtils.connectToPage({
- chromeSessionDir: CHROME_SESSION_DIR,
- timeoutMs: 60000,
- puppeteer,
- });
- console.error('[singlefile] connected to chrome');
- try {
- // Ensure CDP target discovery is enabled so service_worker targets appear
- try {
- const client = await page.createCDPSession();
- await client.send('Target.setDiscoverTargets', { discover: true });
- await client.send('Target.setAutoAttach', { autoAttach: true, waitForDebuggerOnStart: false, flatten: true });
- } catch (err) {
- console.error(`[singlefile] failed to enable target discovery: ${err.message || err}`);
- }
- // Wait for extension target to be available, then attach dispatchAction
- console.error('[singlefile] waiting for extension target...');
- const deadline = Date.now() + 30000;
- let matchTarget = null;
- let matchInfo = null;
- let lastLog = 0;
- const wantedName = (extension.name || 'singlefile').toLowerCase();
- while (Date.now() < deadline && !matchTarget) {
- const targets = browser.targets();
- for (const target of targets) {
- const info = await chromeUtils.isTargetExtension(target);
- if (!info?.target_is_extension || !info?.extension_id) {
- continue;
- }
- const manifestName = (info.manifest_name || '').toLowerCase();
- const targetUrl = (info.target_url || '').toLowerCase();
- const nameMatches = manifestName.includes(wantedName) || manifestName.includes('singlefile') || manifestName.includes('single-file');
- const urlMatches = targetUrl.includes('singlefile') || targetUrl.includes('single-file') || targetUrl.includes('single-file-extension');
- if (nameMatches || urlMatches) {
- matchTarget = target;
- matchInfo = info;
- break;
- }
- }
- if (!matchTarget) {
- if (Date.now() - lastLog > 5000) {
- const targetsSummary = [];
- for (const target of targets) {
- const info = await chromeUtils.isTargetExtension(target);
- if (!info?.target_is_extension) {
- continue;
- }
- targetsSummary.push({
- type: info.target_type,
- url: info.target_url,
- extensionId: info.extension_id,
- manifestName: info.manifest_name,
- });
- }
- console.error(`[singlefile] waiting... targets total=${targets.length} extensions=${targetsSummary.length} details=${JSON.stringify(targetsSummary)}`);
- lastLog = Date.now();
- }
- await new Promise(r => setTimeout(r, 500));
- }
- }
- if (!matchTarget || !matchInfo) {
- const targets = chromeUtils.getExtensionTargets(browser);
- console.error(`[singlefile] extension target not found (name=${extension.name})`);
- console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
- await browser.disconnect();
- process.exit(5);
- }
- // Use the runtime extension id from the matched target
- extension.id = matchInfo.extension_id;
- console.error('[singlefile] loading extension from target...');
- await chromeUtils.loadExtensionFromTarget([extension], matchTarget);
- if (typeof extension.dispatchAction !== 'function') {
- const targets = chromeUtils.getExtensionTargets(browser);
- console.error(`[singlefile] extension dispatchAction missing for id=${extension.id}`);
- console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
- await browser.disconnect();
- process.exit(6);
- }
- console.error('[singlefile] setting download dir...');
- await setDownloadDir(page, DOWNLOADS_DIR);
- console.error('[singlefile] triggering save via extension...');
- const output = await saveSinglefileWithExtension(page, extension, { downloadsDir: DOWNLOADS_DIR });
- if (output && fs.existsSync(output)) {
- console.error(`[singlefile] saved: ${output}`);
- console.log(output);
- await browser.disconnect();
- process.exit(0);
- }
- console.error('[❌] SingleFile extension did not produce output');
- await browser.disconnect();
- process.exit(3);
- } catch (err) {
- await browser.disconnect();
- throw err;
- }
- } catch (err) {
- console.error(`[❌] ${err.message || err}`);
- process.exit(4);
- }
- }
- if (require.main === module) {
- main();
- }
|