| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392 |
- {% extends "core/base.html" %}
- {% load static %}
- {% load i18n %}
- {% block breadcrumbs %}
- <div class="breadcrumbs">
- <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
- {% if title %} › {{ title }}{% endif %}
- </div>
- {% endblock %}
- {% block extra_head %}
- <link rel="stylesheet" href="{% static 'add.css' %}" />
- {% endblock %}
- {% block body %}
- <div style="max-width: 1440px; margin: auto; float: none">
- <br/><br/>
- {% if stdout %}
- <h1>Add new URLs to your archive: results</h1>
- <pre id="stdout">
- {{ stdout | safe }}
- <br/><br/>
- </pre>
- <br/>
- <center>
- <a href="/add" id="submit"> Add more URLs ➕</a>
- </center>
- {% else %}
- <div id="in-progress" style="display: none;">
- <center><h3>Creating crawl and queueing snapshots...</h3>
- <p>Your crawl is being created. The orchestrator will process URLs and create snapshots in the background.</p>
- <br/>
- <div class="loader"></div>
- <br/>
- Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
- </center>
- </div>
- <form id="add-form" method="POST" class="p-form">{% csrf_token %}
- <h1>Create a new Crawl</h1>
- <div class="crawl-explanation">
- <p>
- A <strong>Crawl</strong> is a job that processes URLs and creates <strong>Snapshots</strong> (archived copies) for each URL discovered.
- The settings below apply to the entire crawl and all snapshots it creates.
- </p>
- </div>
- <br/>
- <!-- Basic fields -->
- <div class="form-section">
- <h3>Crawl Settings</h3>
- <div class="form-field">
- {{ form.url.label_tag }}
- {{ form.url }}
- <div id="url-counter" class="url-counter">0 URLs detected</div>
- {% if form.url.errors %}
- <div class="error">{{ form.url.errors }}</div>
- {% endif %}
- <div class="help-text">
- Enter URLs to archive, one per line. Examples:<br/>
- <code>https://example.com</code><br/>
- <code>https://news.ycombinator.com</code><br/>
- <code>https://github.com/ArchiveBox/ArchiveBox</code>
- </div>
- </div>
- <div class="form-field">
- {{ form.tag.label_tag }}
- {{ form.tag }}
- <!-- Tag autocomplete datalist -->
- <datalist id="tag-datalist">
- {% for tag_name in available_tags %}
- <option value="{{ tag_name }}">
- {% endfor %}
- </datalist>
- {% if form.tag.errors %}
- <div class="error">{{ form.tag.errors }}</div>
- {% endif %}
- <div class="help-text">Tags will be applied to all snapshots created by this crawl. Start typing to see existing tags.</div>
- </div>
- <div class="form-field">
- {{ form.depth.label_tag }}
- {{ form.depth }}
- {% if form.depth.errors %}
- <div class="error">{{ form.depth.errors }}</div>
- {% endif %}
- <div class="help-text">Controls how many links deep the crawl will follow from the starting URLs.</div>
- </div>
- <div class="form-field">
- {{ form.notes.label_tag }}
- {{ form.notes }}
- {% if form.notes.errors %}
- <div class="error">{{ form.notes.errors }}</div>
- {% endif %}
- <div class="help-text">Optional description for this crawl (visible in the admin interface).</div>
- </div>
- </div>
- <!-- Plugins section -->
- <div class="form-section">
- <h3>Crawl Plugins</h3>
- <p class="section-description">
- Select which archiving methods to run for all snapshots in this crawl. If none selected, all available plugins will be used.
- <a href="/admin/environment/plugins/" target="_blank">View plugin details →</a>
- </p>
- <!-- Plugin Presets -->
- <div class="plugin-presets">
- <span class="preset-label">Quick Select:</span>
- <button type="button" class="preset-btn" data-preset="quick-archive">📦 Quick Archive</button>
- <button type="button" class="preset-btn" data-preset="full-chrome">🌐 Full Chrome</button>
- <button type="button" class="preset-btn" data-preset="text-only">📄 Text Only</button>
- <button type="button" class="preset-btn" data-preset="select-all">✓ Select All</button>
- <button type="button" class="preset-btn" data-preset="clear-all">✗ Clear All</button>
- </div>
- <!-- Chrome-dependent plugins with "Select All" -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Chrome-dependent plugins</label>
- <button type="button" class="select-all-btn" data-group="chrome">
- Select All Chrome
- </button>
- </div>
- <div class="plugin-checkboxes" id="chrome-plugins">
- {{ form.chrome_plugins }}
- </div>
- </div>
- <!-- Archiving plugins -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Archiving</label>
- </div>
- <div class="plugin-checkboxes">
- {{ form.archiving_plugins }}
- </div>
- </div>
- <!-- Parsing plugins -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Parsing</label>
- </div>
- <div class="plugin-checkboxes">
- {{ form.parsing_plugins }}
- </div>
- </div>
- <!-- Search plugins -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Search</label>
- </div>
- <div class="plugin-checkboxes">
- {{ form.search_plugins }}
- </div>
- </div>
- <!-- Binary provider plugins -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Binary Providers</label>
- </div>
- <div class="plugin-checkboxes">
- {{ form.binary_plugins }}
- </div>
- </div>
- <!-- Extension plugins -->
- <div class="plugin-group">
- <div class="plugin-group-header">
- <label>Browser Extensions</label>
- </div>
- <div class="plugin-checkboxes">
- {{ form.extension_plugins }}
- </div>
- </div>
- </div>
- <!-- Advanced options (collapsible) -->
- <div class="form-section">
- <details class="advanced-section">
- <summary><h3>Advanced Crawl Options</h3></summary>
- <p class="section-description">Additional settings that control how this crawl processes URLs and creates snapshots.</p>
- <div class="form-field">
- {{ form.schedule.label_tag }}
- {{ form.schedule }}
- {% if form.schedule.errors %}
- <div class="error">{{ form.schedule.errors }}</div>
- {% endif %}
- <div class="help-text">
- Optional: Schedule this crawl to repeat automatically. Examples:<br/>
- <code>daily</code> - Run once per day<br/>
- <code>weekly</code> - Run once per week<br/>
- <code>0 */6 * * *</code> - Every 6 hours (cron format)<br/>
- <code>0 0 * * 0</code> - Every Sunday at midnight (cron format)
- </div>
- </div>
- <div class="form-field">
- {{ form.persona.label_tag }}
- {{ form.persona }}
- {% if form.persona.errors %}
- <div class="error">{{ form.persona.errors }}</div>
- {% endif %}
- <div class="help-text">
- Authentication profile to use for all snapshots in this crawl.
- <a href="/admin/personas/persona/add/" target="_blank">Create new persona →</a>
- </div>
- </div>
- <div class="form-field checkbox-field">
- {{ form.overwrite }}
- {{ form.overwrite.label_tag }}
- {% if form.overwrite.errors %}
- <div class="error">{{ form.overwrite.errors }}</div>
- {% endif %}
- <div class="help-text">Re-archive URLs even if they already exist</div>
- </div>
- <div class="form-field checkbox-field">
- {{ form.update }}
- {{ form.update.label_tag }}
- {% if form.update.errors %}
- <div class="error">{{ form.update.errors }}</div>
- {% endif %}
- <div class="help-text">Retry archiving URLs that previously failed</div>
- </div>
- <div class="form-field checkbox-field">
- {{ form.index_only }}
- {{ form.index_only.label_tag }}
- {% if form.index_only.errors %}
- <div class="error">{{ form.index_only.errors }}</div>
- {% endif %}
- <div class="help-text">Create snapshots but don't run archiving plugins yet (queue for later)</div>
- </div>
- <div class="form-field">
- {{ form.config.label_tag }}
- {{ form.config }}
- {% if form.config.errors %}
- <div class="error">{{ form.config.errors }}</div>
- {% endif %}
- <div class="help-text">
- Override any config option for this crawl (e.g., TIMEOUT, USER_AGENT, CHROME_BINARY, etc.)
- </div>
- </div>
- </details>
- </div>
- <center>
- <button role="submit" id="submit"> Create Crawl and Start Archiving ➕</button>
- </center>
- </form>
- <br/><br/><br/>
- <center id="delay-warning" style="display: none">
- <small>(you will be redirected to your new Crawl page momentarily, it's safe to close this page at any time)</small>
- </center>
- {% if absolute_add_path %}
- <!-- <center id="bookmarklet">
- <p>Bookmark this link to quickly add to your archive:
- <a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
- </center> -->
- {% endif %}
- <script>
- // URL Counter - detect URLs in textarea using regex
- const urlTextarea = document.querySelector('textarea[name="url"]');
- const urlCounter = document.getElementById('url-counter');
- function updateURLCount() {
- const text = urlTextarea.value;
- // Match http(s):// URLs
- const urlRegex = /https?:\/\/[^\s]+/gi;
- const matches = text.match(urlRegex) || [];
- const count = matches.length;
- urlCounter.textContent = `${count} URL${count !== 1 ? 's' : ''} detected`;
- urlCounter.className = count > 0 ? 'url-counter url-counter-positive' : 'url-counter';
- }
- urlTextarea.addEventListener('input', updateURLCount);
- updateURLCount(); // Initial count
- // Plugin Presets
- const presetConfigs = {
- 'quick-archive': ['screenshot', 'dom', 'favicon', 'wget', 'title'],
- 'full-chrome': ['chrome', 'screenshot', 'pdf', 'dom', 'singlefile', 'consolelog', 'redirects', 'responses', 'ssl', 'headers', 'title', 'accessibility', 'seo'],
- 'text-only': ['wget', 'readability', 'mercury', 'htmltotext', 'title', 'favicon']
- };
- document.querySelectorAll('.preset-btn').forEach(btn => {
- btn.addEventListener('click', function() {
- const preset = this.dataset.preset;
- const allCheckboxes = document.querySelectorAll('.plugin-checkboxes input[type="checkbox"]');
- if (preset === 'select-all') {
- allCheckboxes.forEach(cb => cb.checked = true);
- } else if (preset === 'clear-all') {
- allCheckboxes.forEach(cb => cb.checked = false);
- } else if (presetConfigs[preset]) {
- const pluginsToSelect = presetConfigs[preset];
- allCheckboxes.forEach(cb => {
- cb.checked = pluginsToSelect.includes(cb.value);
- });
- }
- // Save to localStorage after preset selection
- saveFormState();
- });
- });
- // Select All Chrome button handler
- document.querySelectorAll('.select-all-btn').forEach(btn => {
- btn.addEventListener('click', function() {
- const group = this.dataset.group;
- const container = document.getElementById(group + '-plugins');
- const checkboxes = container.querySelectorAll('input[type="checkbox"]');
- const allChecked = Array.from(checkboxes).every(cb => cb.checked);
- checkboxes.forEach(cb => {
- cb.checked = !allChecked;
- });
- this.textContent = allChecked ? 'Select All Chrome' : 'Deselect All Chrome';
- saveFormState();
- });
- });
- // LocalStorage: Save/Load form state (all fields including URLs for repeat crawls)
- const STORAGE_KEY = 'archivebox_add_form_state';
- function saveFormState() {
- const state = {};
- document.querySelectorAll('#add-form input, #add-form textarea, #add-form select').forEach(el => {
- if (el.name === 'csrfmiddlewaretoken') return;
- if (el.type === 'checkbox' || el.type === 'radio') {
- state[el.name + ':' + el.value] = el.checked;
- } else {
- state[el.name] = el.value;
- }
- });
- localStorage.setItem(STORAGE_KEY, JSON.stringify(state));
- }
- function loadFormState() {
- try {
- const state = JSON.parse(localStorage.getItem(STORAGE_KEY) || '{}');
- for (const [key, value] of Object.entries(state)) {
- if (key.includes(':')) {
- const [name, val] = key.split(':');
- const el = document.querySelector(`[name="${name}"][value="${val}"]`);
- if (el) el.checked = value;
- } else {
- const el = document.querySelector(`[name="${key}"]`);
- if (el && el.type !== 'checkbox' && el.type !== 'radio') el.value = value;
- }
- }
- updateURLCount(); // Update counter after loading URLs
- } catch (e) {}
- }
- // Auto-save on changes
- document.querySelectorAll('#add-form input, #add-form textarea, #add-form select').forEach(el => {
- el.addEventListener('change', saveFormState);
- });
- loadFormState();
- // Form submission handler
- document.getElementById('add-form').addEventListener('submit', function(event) {
- document.getElementById('in-progress').style.display = 'block'
- document.getElementById('add-form').style.display = 'none'
- document.getElementById('delay-warning').style.display = 'block'
- setTimeout(function() {
- window.location = '/'
- }, 2000)
- return true
- })
- </script>
- {% endif %}
- </div>
- {% endblock %}
- {% block footer %}{% endblock %}
- {% block sidebar %}{% endblock %}
|