add.html 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. {% extends "core/base.html" %}
  2. {% load static %}
  3. {% load i18n %}
  4. {% block breadcrumbs %}
  5. <div class="breadcrumbs">
  6. <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
  7. {% if title %} &rsaquo; {{ title }}{% endif %}
  8. </div>
  9. {% endblock %}
  10. {% block extra_head %}
  11. <link rel="stylesheet" href="{% static 'add.css' %}" />
  12. {% endblock %}
  13. {% block body %}
  14. <div style="max-width: 1440px; margin: auto; float: none">
  15. <br/><br/>
  16. {% if stdout %}
  17. <h1>Add new URLs to your archive: results</h1>
  18. <pre id="stdout">
  19. {{ stdout | safe }}
  20. <br/><br/>
  21. </pre>
  22. <br/>
  23. <center>
  24. <a href="/add" id="submit">&nbsp; Add more URLs ➕</a>
  25. </center>
  26. {% else %}
  27. <div id="in-progress" style="display: none;">
  28. <center><h3>Creating crawl and queueing snapshots...</h3>
  29. <p>Your crawl is being created. The orchestrator will process URLs and create snapshots in the background.</p>
  30. <br/>
  31. <div class="loader"></div>
  32. <br/>
  33. Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
  34. </center>
  35. </div>
  36. <form id="add-form" method="POST" class="p-form">{% csrf_token %}
  37. <h1>Create a new Crawl</h1>
  38. <div class="crawl-explanation">
  39. <p>
  40. A <strong>Crawl</strong> is a job that processes URLs and creates <strong>Snapshots</strong> (archived copies) for each URL discovered.
  41. The settings below apply to the entire crawl and all snapshots it creates.
  42. </p>
  43. </div>
  44. <br/>
  45. <!-- Basic fields -->
  46. <div class="form-section">
  47. <h3>Crawl Settings</h3>
  48. <div class="form-field">
  49. {{ form.url.label_tag }}
  50. {{ form.url }}
  51. <div id="url-counter" class="url-counter">0 URLs detected</div>
  52. {% if form.url.errors %}
  53. <div class="error">{{ form.url.errors }}</div>
  54. {% endif %}
  55. <div class="help-text">
  56. Enter URLs to archive, one per line. Examples:<br/>
  57. <code>https://example.com</code><br/>
  58. <code>https://news.ycombinator.com</code><br/>
  59. <code>https://github.com/ArchiveBox/ArchiveBox</code>
  60. </div>
  61. </div>
  62. <div class="form-field">
  63. {{ form.tag.label_tag }}
  64. {{ form.tag }}
  65. <!-- Tag autocomplete datalist -->
  66. <datalist id="tag-datalist">
  67. {% for tag_name in available_tags %}
  68. <option value="{{ tag_name }}">
  69. {% endfor %}
  70. </datalist>
  71. {% if form.tag.errors %}
  72. <div class="error">{{ form.tag.errors }}</div>
  73. {% endif %}
  74. <div class="help-text">Tags will be applied to all snapshots created by this crawl. Start typing to see existing tags.</div>
  75. </div>
  76. <div class="form-field">
  77. {{ form.depth.label_tag }}
  78. {{ form.depth }}
  79. {% if form.depth.errors %}
  80. <div class="error">{{ form.depth.errors }}</div>
  81. {% endif %}
  82. <div class="help-text">Controls how many links deep the crawl will follow from the starting URLs.</div>
  83. </div>
  84. <div class="form-field">
  85. {{ form.notes.label_tag }}
  86. {{ form.notes }}
  87. {% if form.notes.errors %}
  88. <div class="error">{{ form.notes.errors }}</div>
  89. {% endif %}
  90. <div class="help-text">Optional description for this crawl (visible in the admin interface).</div>
  91. </div>
  92. </div>
  93. <!-- Plugins section -->
  94. <div class="form-section">
  95. <h3>Crawl Plugins</h3>
  96. <p class="section-description">
  97. Select which archiving methods to run for all snapshots in this crawl. If none selected, all available plugins will be used.
  98. <a href="/admin/environment/plugins/" target="_blank">View plugin details →</a>
  99. </p>
  100. <!-- Plugin Presets -->
  101. <div class="plugin-presets">
  102. <span class="preset-label">Quick Select:</span>
  103. <button type="button" class="preset-btn" data-preset="quick-archive">📦 Quick Archive</button>
  104. <button type="button" class="preset-btn" data-preset="full-chrome">🌐 Full Chrome</button>
  105. <button type="button" class="preset-btn" data-preset="text-only">📄 Text Only</button>
  106. <button type="button" class="preset-btn" data-preset="select-all">✓ Select All</button>
  107. <button type="button" class="preset-btn" data-preset="clear-all">✗ Clear All</button>
  108. </div>
  109. <!-- Chrome-dependent plugins with "Select All" -->
  110. <div class="plugin-group">
  111. <div class="plugin-group-header">
  112. <label>Chrome-dependent plugins</label>
  113. <button type="button" class="select-all-btn" data-group="chrome">
  114. Select All Chrome
  115. </button>
  116. </div>
  117. <div class="plugin-checkboxes" id="chrome-plugins">
  118. {{ form.chrome_plugins }}
  119. </div>
  120. </div>
  121. <!-- Archiving plugins -->
  122. <div class="plugin-group">
  123. <div class="plugin-group-header">
  124. <label>Archiving</label>
  125. </div>
  126. <div class="plugin-checkboxes">
  127. {{ form.archiving_plugins }}
  128. </div>
  129. </div>
  130. <!-- Parsing plugins -->
  131. <div class="plugin-group">
  132. <div class="plugin-group-header">
  133. <label>Parsing</label>
  134. </div>
  135. <div class="plugin-checkboxes">
  136. {{ form.parsing_plugins }}
  137. </div>
  138. </div>
  139. <!-- Search plugins -->
  140. <div class="plugin-group">
  141. <div class="plugin-group-header">
  142. <label>Search</label>
  143. </div>
  144. <div class="plugin-checkboxes">
  145. {{ form.search_plugins }}
  146. </div>
  147. </div>
  148. <!-- Binary provider plugins -->
  149. <div class="plugin-group">
  150. <div class="plugin-group-header">
  151. <label>Binary Providers</label>
  152. </div>
  153. <div class="plugin-checkboxes">
  154. {{ form.binary_plugins }}
  155. </div>
  156. </div>
  157. <!-- Extension plugins -->
  158. <div class="plugin-group">
  159. <div class="plugin-group-header">
  160. <label>Browser Extensions</label>
  161. </div>
  162. <div class="plugin-checkboxes">
  163. {{ form.extension_plugins }}
  164. </div>
  165. </div>
  166. </div>
  167. <!-- Advanced options (collapsible) -->
  168. <div class="form-section">
  169. <details class="advanced-section">
  170. <summary><h3>Advanced Crawl Options</h3></summary>
  171. <p class="section-description">Additional settings that control how this crawl processes URLs and creates snapshots.</p>
  172. <div class="form-field">
  173. {{ form.schedule.label_tag }}
  174. {{ form.schedule }}
  175. {% if form.schedule.errors %}
  176. <div class="error">{{ form.schedule.errors }}</div>
  177. {% endif %}
  178. <div class="help-text">
  179. Optional: Schedule this crawl to repeat automatically. Examples:<br/>
  180. <code>daily</code> - Run once per day<br/>
  181. <code>weekly</code> - Run once per week<br/>
  182. <code>0 */6 * * *</code> - Every 6 hours (cron format)<br/>
  183. <code>0 0 * * 0</code> - Every Sunday at midnight (cron format)
  184. </div>
  185. </div>
  186. <div class="form-field">
  187. {{ form.persona.label_tag }}
  188. {{ form.persona }}
  189. {% if form.persona.errors %}
  190. <div class="error">{{ form.persona.errors }}</div>
  191. {% endif %}
  192. <div class="help-text">
  193. Authentication profile to use for all snapshots in this crawl.
  194. <a href="/admin/personas/persona/add/" target="_blank">Create new persona →</a>
  195. </div>
  196. </div>
  197. <div class="form-field checkbox-field">
  198. {{ form.overwrite }}
  199. {{ form.overwrite.label_tag }}
  200. {% if form.overwrite.errors %}
  201. <div class="error">{{ form.overwrite.errors }}</div>
  202. {% endif %}
  203. <div class="help-text">Re-archive URLs even if they already exist</div>
  204. </div>
  205. <div class="form-field checkbox-field">
  206. {{ form.update }}
  207. {{ form.update.label_tag }}
  208. {% if form.update.errors %}
  209. <div class="error">{{ form.update.errors }}</div>
  210. {% endif %}
  211. <div class="help-text">Retry archiving URLs that previously failed</div>
  212. </div>
  213. <div class="form-field checkbox-field">
  214. {{ form.index_only }}
  215. {{ form.index_only.label_tag }}
  216. {% if form.index_only.errors %}
  217. <div class="error">{{ form.index_only.errors }}</div>
  218. {% endif %}
  219. <div class="help-text">Create snapshots but don't run archiving plugins yet (queue for later)</div>
  220. </div>
  221. <div class="form-field">
  222. {{ form.config.label_tag }}
  223. {{ form.config }}
  224. {% if form.config.errors %}
  225. <div class="error">{{ form.config.errors }}</div>
  226. {% endif %}
  227. <div class="help-text">
  228. Override any config option for this crawl (e.g., TIMEOUT, USER_AGENT, CHROME_BINARY, etc.)
  229. </div>
  230. </div>
  231. </details>
  232. </div>
  233. <center>
  234. <button role="submit" id="submit">&nbsp; Create Crawl and Start Archiving ➕</button>
  235. </center>
  236. </form>
  237. <br/><br/><br/>
  238. <center id="delay-warning" style="display: none">
  239. <small>(you will be redirected to your new Crawl page momentarily, it's safe to close this page at any time)</small>
  240. </center>
  241. {% if absolute_add_path %}
  242. <!-- <center id="bookmarklet">
  243. <p>Bookmark this link to quickly add to your archive:
  244. <a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
  245. </center> -->
  246. {% endif %}
  247. <script>
  248. // URL Counter - detect URLs in textarea using regex
  249. const urlTextarea = document.querySelector('textarea[name="url"]');
  250. const urlCounter = document.getElementById('url-counter');
  251. function updateURLCount() {
  252. const text = urlTextarea.value;
  253. // Match http(s):// URLs
  254. const urlRegex = /https?:\/\/[^\s]+/gi;
  255. const matches = text.match(urlRegex) || [];
  256. const count = matches.length;
  257. urlCounter.textContent = `${count} URL${count !== 1 ? 's' : ''} detected`;
  258. urlCounter.className = count > 0 ? 'url-counter url-counter-positive' : 'url-counter';
  259. }
  260. urlTextarea.addEventListener('input', updateURLCount);
  261. updateURLCount(); // Initial count
  262. // Plugin Presets
  263. const presetConfigs = {
  264. 'quick-archive': ['screenshot', 'dom', 'favicon', 'wget', 'title'],
  265. 'full-chrome': ['chrome', 'screenshot', 'pdf', 'dom', 'singlefile', 'consolelog', 'redirects', 'responses', 'ssl', 'headers', 'title', 'accessibility', 'seo'],
  266. 'text-only': ['wget', 'readability', 'mercury', 'htmltotext', 'title', 'favicon']
  267. };
  268. document.querySelectorAll('.preset-btn').forEach(btn => {
  269. btn.addEventListener('click', function() {
  270. const preset = this.dataset.preset;
  271. const allCheckboxes = document.querySelectorAll('.plugin-checkboxes input[type="checkbox"]');
  272. if (preset === 'select-all') {
  273. allCheckboxes.forEach(cb => cb.checked = true);
  274. } else if (preset === 'clear-all') {
  275. allCheckboxes.forEach(cb => cb.checked = false);
  276. } else if (presetConfigs[preset]) {
  277. const pluginsToSelect = presetConfigs[preset];
  278. allCheckboxes.forEach(cb => {
  279. cb.checked = pluginsToSelect.includes(cb.value);
  280. });
  281. }
  282. // Save to localStorage after preset selection
  283. saveFormState();
  284. });
  285. });
  286. // Select All Chrome button handler
  287. document.querySelectorAll('.select-all-btn').forEach(btn => {
  288. btn.addEventListener('click', function() {
  289. const group = this.dataset.group;
  290. const container = document.getElementById(group + '-plugins');
  291. const checkboxes = container.querySelectorAll('input[type="checkbox"]');
  292. const allChecked = Array.from(checkboxes).every(cb => cb.checked);
  293. checkboxes.forEach(cb => {
  294. cb.checked = !allChecked;
  295. });
  296. this.textContent = allChecked ? 'Select All Chrome' : 'Deselect All Chrome';
  297. saveFormState();
  298. });
  299. });
  300. // LocalStorage: Save/Load form state (all fields including URLs for repeat crawls)
  301. const STORAGE_KEY = 'archivebox_add_form_state';
  302. function saveFormState() {
  303. const state = {};
  304. document.querySelectorAll('#add-form input, #add-form textarea, #add-form select').forEach(el => {
  305. if (el.name === 'csrfmiddlewaretoken') return;
  306. if (el.type === 'checkbox' || el.type === 'radio') {
  307. state[el.name + ':' + el.value] = el.checked;
  308. } else {
  309. state[el.name] = el.value;
  310. }
  311. });
  312. localStorage.setItem(STORAGE_KEY, JSON.stringify(state));
  313. }
  314. function loadFormState() {
  315. try {
  316. const state = JSON.parse(localStorage.getItem(STORAGE_KEY) || '{}');
  317. for (const [key, value] of Object.entries(state)) {
  318. if (key.includes(':')) {
  319. const [name, val] = key.split(':');
  320. const el = document.querySelector(`[name="${name}"][value="${val}"]`);
  321. if (el) el.checked = value;
  322. } else {
  323. const el = document.querySelector(`[name="${key}"]`);
  324. if (el && el.type !== 'checkbox' && el.type !== 'radio') el.value = value;
  325. }
  326. }
  327. updateURLCount(); // Update counter after loading URLs
  328. } catch (e) {}
  329. }
  330. // Auto-save on changes
  331. document.querySelectorAll('#add-form input, #add-form textarea, #add-form select').forEach(el => {
  332. el.addEventListener('change', saveFormState);
  333. });
  334. loadFormState();
  335. // Form submission handler
  336. document.getElementById('add-form').addEventListener('submit', function(event) {
  337. document.getElementById('in-progress').style.display = 'block'
  338. document.getElementById('add-form').style.display = 'none'
  339. document.getElementById('delay-warning').style.display = 'block'
  340. setTimeout(function() {
  341. window.location = '/'
  342. }, 2000)
  343. return true
  344. })
  345. </script>
  346. {% endif %}
  347. </div>
  348. {% endblock %}
  349. {% block footer %}{% endblock %}
  350. {% block sidebar %}{% endblock %}