Browse Source

rename OUTPUT_DIR to DATA_DIR

Nick Sweeting 1 year ago
parent
commit
b913e6f426

+ 1 - 1
archivebox/__init__.py

@@ -16,7 +16,7 @@ if str(PACKAGE_DIR) not in sys.path:
 
 from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR   # noqa
 
-os.environ['OUTPUT_DIR'] = str(DATA_DIR)
+os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR)
 os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
 
 # print('INSTALLING MONKEY PATCHES')

+ 5 - 4
archivebox/cli/archivebox_add.py

@@ -8,10 +8,11 @@ import argparse
 
 from typing import List, Optional, IO
 
-from ..main import add
 from archivebox.misc.util import docstring
+from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
+
+from ..main import add
 from ..parsers import PARSERS
-from ..config.legacy import OUTPUT_DIR, ONLY_NEW
 from ..logging_util import SmartFormatter, accept_stdin, stderr
 
 
@@ -32,7 +33,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--update', #'-u',
         action='store_true',
-        default=not ONLY_NEW,  # when ONLY_NEW=True we skip updating old links
+        default=not ARCHIVING_CONFIG.ONLY_NEW,  # when ONLY_NEW=True we skip updating old links
         help="Also retry previously skipped/failed links when adding new links",
     )
     parser.add_argument(
@@ -117,7 +118,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         init=command.init,
         extractors=command.extract,
         parser=command.parser,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=pwd or DATA_DIR,
     )
 
 

+ 4 - 3
archivebox/cli/archivebox_config.py

@@ -5,12 +5,13 @@ __command__ = 'archivebox config'
 
 import sys
 import argparse
+from pathlib import Path
 
 from typing import Optional, List, IO
 
-from ..main import config
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
+from ..main import config
 from ..logging_util import SmartFormatter, accept_stdin
 
 
@@ -56,7 +57,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         get=command.get,
         set=command.set,
         reset=command.reset,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
 
 

+ 4 - 4
archivebox/cli/archivebox_help.py

@@ -5,12 +5,12 @@ __command__ = 'archivebox help'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import help
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
+from ..main import help
 from ..logging_util import SmartFormatter, reject_stdin
 
 
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
     
-    help(out_dir=pwd or OUTPUT_DIR)
+    help(out_dir=Path(pwd) if pwd else DATA_DIR)
 
 
 if __name__ == '__main__':

+ 2 - 2
archivebox/cli/archivebox_init.py

@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import init
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
 
@@ -44,7 +44,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         force=command.force,
         quick=command.quick,
         setup=command.setup,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=pwd or DATA_DIR,
     )
     
 

+ 4 - 4
archivebox/cli/archivebox_list.py

@@ -5,12 +5,12 @@ __command__ = 'archivebox list'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import list_all
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
+from ..main import list_all
 from ..index import (
     LINK_FILTERS,
     get_indexed_folders,
@@ -131,7 +131,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         json=command.json,
         html=command.html,
         with_headers=command.with_headers,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
     raise SystemExit(not matching_folders)
 

+ 4 - 4
archivebox/cli/archivebox_manage.py

@@ -4,19 +4,19 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox manage'
 
 import sys
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import manage
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
+from ..main import manage
 
 
 @docstring(manage.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
     manage(
         args=args,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
 
 

+ 3 - 3
archivebox/cli/archivebox_oneshot.py

@@ -9,10 +9,10 @@ import argparse
 from pathlib import Path
 from typing import List, Optional, IO
 
-from ..main import oneshot
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, accept_stdin, stderr
+from ..main import oneshot
 
 
 @docstring(oneshot.__doc__)
@@ -46,7 +46,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--out-dir',
         type=str,
-        default=OUTPUT_DIR,
+        default=DATA_DIR,
         help= "Path to save the single archive folder to, e.g. ./example.com_archive"
     )
     command = parser.parse_args(args or ())

+ 4 - 4
archivebox/cli/archivebox_remove.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox remove'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import remove
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, accept_stdin
+from ..main import remove
 
 
 @docstring(remove.__doc__)
@@ -74,7 +74,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         after=command.after,
         yes=command.yes,
         delete=command.delete,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
     
 

+ 4 - 4
archivebox/cli/archivebox_schedule.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox schedule'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import schedule
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import schedule
 
 
 @docstring(schedule.__doc__)
@@ -108,7 +108,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         overwrite=command.overwrite,
         update=command.update,
         import_path=command.import_path,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
 
 

+ 5 - 5
archivebox/cli/archivebox_server.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox server'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import server
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR, BIND_ADDR
+from archivebox.config import DATA_DIR, SERVER_CONFIG
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import server
 
 @docstring(server.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         'runserver_args',
         nargs='*',
         type=str,
-        default=[BIND_ADDR],
+        default=[SERVER_CONFIG.BIND_ADDR],
         help='Arguments to pass to Django runserver'
     )
     parser.add_argument(
@@ -68,7 +68,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         init=command.init,
         quick_init=command.quick_init,
         createsuperuser=command.createsuperuser,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
 
 

+ 4 - 4
archivebox/cli/archivebox_setup.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox setup'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import setup
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import setup
 
 
 @docstring(setup.__doc__)
@@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
 
     setup(
         # force=command.force,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
     
 

+ 4 - 4
archivebox/cli/archivebox_shell.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox shell'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import shell
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import shell
 
 
 @docstring(shell.__doc__)
@@ -26,7 +26,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     reject_stdin(__command__, stdin)
     
     shell(
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
     
 

+ 4 - 4
archivebox/cli/archivebox_status.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox status'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import status
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import status
 
 
 @docstring(status.__doc__)
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
 
-    status(out_dir=pwd or OUTPUT_DIR)
+    status(out_dir=Path(pwd) if pwd else DATA_DIR)
 
 
 if __name__ == '__main__':

+ 4 - 4
archivebox/cli/archivebox_update.py

@@ -5,12 +5,11 @@ __command__ = 'archivebox update'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import List, Optional, IO
 
-from ..main import update
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..index import (
     LINK_FILTERS,
     get_indexed_folders,
@@ -25,6 +24,7 @@ from ..index import (
     get_unrecognized_folders,
 )
 from ..logging_util import SmartFormatter, accept_stdin
+from ..main import update
 
 
 @docstring(update.__doc__)
@@ -127,7 +127,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         status=command.status,
         after=command.after,
         before=command.before,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
         extractors=command.extract,
     )
     

+ 4 - 4
archivebox/cli/archivebox_version.py

@@ -5,13 +5,13 @@ __command__ = 'archivebox version'
 
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 
-from ..main import version
 from archivebox.misc.util import docstring
-from ..config.legacy import OUTPUT_DIR
+from archivebox.config import DATA_DIR
 from ..logging_util import SmartFormatter, reject_stdin
+from ..main import version
 
 
 @docstring(version.__doc__)
@@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     
     version(
         quiet=command.quiet,
-        out_dir=pwd or OUTPUT_DIR,
+        out_dir=Path(pwd) if pwd else DATA_DIR,
     )
 
 

+ 26 - 26
archivebox/cli/tests.py

@@ -15,7 +15,7 @@ TEST_CONFIG = {
     'USE_COLOR': 'False',
     'SHOW_PROGRESS': 'False',
 
-    'OUTPUT_DIR': 'data.tests',
+    'DATA_DIR': 'data.tests',
     
     'SAVE_ARCHIVE_DOT_ORG': 'False',
     'SAVE_TITLE': 'False',
@@ -27,12 +27,12 @@ TEST_CONFIG = {
     'USE_YOUTUBEDL': 'False',
 }
 
-OUTPUT_DIR = 'data.tests'
+DATA_DIR = 'data.tests'
 os.environ.update(TEST_CONFIG)
 
 from ..main import init
 from ..index import load_main_index
-from ..config.legacy import (
+from archivebox.config.constants import (
     SQL_INDEX_FILENAME,
     JSON_INDEX_FILENAME,
     HTML_INDEX_FILENAME,
@@ -101,22 +101,22 @@ def output_hidden(show_failing=True):
 
 class TestInit(unittest.TestCase):
     def setUp(self):
-        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        os.makedirs(DATA_DIR, exist_ok=True)
 
     def tearDown(self):
-        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        shutil.rmtree(DATA_DIR, ignore_errors=True)
 
     def test_basic_init(self):
         with output_hidden():
             archivebox_init.main([])
 
-        assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
-        assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
-        assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
-        assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
+        assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
+        assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
+        assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
+        assert len(load_main_index(out_dir=DATA_DIR)) == 0
 
     def test_conflicting_init(self):
-        with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
+        with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
             f.write('test')
 
         try:
@@ -126,11 +126,11 @@ class TestInit(unittest.TestCase):
         except SystemExit:
             pass
 
-        assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
-        assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
-        assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
+        assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
+        assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
+        assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
         try:
-            load_main_index(out_dir=OUTPUT_DIR)
+            load_main_index(out_dir=DATA_DIR)
             assert False, 'load_main_index should raise an exception when no index is present'
         except Exception:
             pass
@@ -138,36 +138,36 @@ class TestInit(unittest.TestCase):
     def test_no_dirty_state(self):
         with output_hidden():
             init()
-        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        shutil.rmtree(DATA_DIR, ignore_errors=True)
         with output_hidden():
             init()
 
 
 class TestAdd(unittest.TestCase):
     def setUp(self):
-        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        os.makedirs(DATA_DIR, exist_ok=True)
         with output_hidden():
             init()
 
     def tearDown(self):
-        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        shutil.rmtree(DATA_DIR, ignore_errors=True)
 
     def test_add_arg_url(self):
         with output_hidden():
             archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 30
 
     def test_add_arg_file(self):
-        test_file = Path(OUTPUT_DIR) / 'test.txt'
+        test_file = Path(DATA_DIR) / 'test.txt'
         with open(test_file, 'w+', encoding='utf') as f:
             f.write(test_urls)
 
         with output_hidden():
             archivebox_add.main([test_file])
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 12
         os.remove(test_file)
 
@@ -175,40 +175,40 @@ class TestAdd(unittest.TestCase):
         with output_hidden():
             archivebox_add.main([], stdin=test_urls)
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 12
 
 
 class TestRemove(unittest.TestCase):
     def setUp(self):
-        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        os.makedirs(DATA_DIR, exist_ok=True)
         with output_hidden():
             init()
             archivebox_add.main([], stdin=test_urls)
 
     # def tearDown(self):
-        # shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        # shutil.rmtree(DATA_DIR, ignore_errors=True)
 
 
     def test_remove_exact(self):
         with output_hidden():
             archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 11
 
     def test_remove_regex(self):
         with output_hidden():
             archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 4
 
     def test_remove_domain(self):
         with output_hidden():
             archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=DATA_DIR)
         assert len(all_links) == 10
 
     def test_remove_none(self):

+ 0 - 1
archivebox/config/config_stubs.py

@@ -36,7 +36,6 @@ class ConfigDict(BaseConfig, benedict, total=False):
     IN_DOCKER: bool
 
     PACKAGE_DIR: Path
-    OUTPUT_DIR: Path
     CONFIG_FILE: Path
     ONLY_NEW: bool
     TIMEOUT: int

+ 0 - 1
archivebox/config/constants.py

@@ -60,7 +60,6 @@ class ConstantsDict(Mapping):
     LIB_DIR_NAME: str = 'lib'
     TMP_DIR_NAME: str = 'tmp'
 
-    OUTPUT_DIR: Path                    = DATA_DIR
     ARCHIVE_DIR: Path                   = DATA_DIR / ARCHIVE_DIR_NAME
     SOURCES_DIR: Path                   = DATA_DIR / SOURCES_DIR_NAME
     PERSONAS_DIR: Path                  = DATA_DIR / PERSONAS_DIR_NAME

+ 7 - 6
archivebox/config/legacy.py

@@ -44,7 +44,7 @@ import django
 from django.db.backends.sqlite3.base import Database as sqlite3
 
 
-from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
+from .constants import CONSTANTS, TIMEZONE
 from .constants import *
 from .config_stubs import (
     ConfigValue,
@@ -57,8 +57,9 @@ from ..misc.logging import (
 )
 
 from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
-from ..plugins_auth.ldap.apps import LDAP_CONFIG
-from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
+from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
+from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
+
 ANSI = SHELL_CONFIG.ANSI
 LDAP = LDAP_CONFIG.LDAP_ENABLED
 
@@ -331,7 +332,7 @@ def load_config_val(key: str,
 
 
 def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
-    """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
+    """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
 
     config_path = CONSTANTS.CONFIG_FILE
     if config_path.exists():
@@ -351,7 +352,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic
 
 
 def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
-    """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
+    """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
 
     from archivebox.misc.system import atomic_write
 
@@ -785,7 +786,7 @@ def bump_startup_progress_bar():
 
 def setup_django_minimal():
     # sys.path.append(str(CONSTANTS.PACKAGE_DIR))
-    # os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
+    # os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
     # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
     # django.setup()
     raise Exception('dont use this anymore')

+ 7 - 8
archivebox/core/admin.py

@@ -21,8 +21,7 @@ from django import forms
 from signal_webhooks.admin import WebhookAdmin
 from signal_webhooks.utils import get_webhook_model
 
-from archivebox.config import VERSION
-
+from archivebox.config import VERSION, DATA_DIR
 from archivebox.misc.util import htmldecode, urldecode
 
 from core.models import Snapshot, ArchiveResult, Tag
@@ -536,11 +535,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
         links = [snapshot.as_link() for snapshot in queryset]
         if len(links) < 3:
             # run syncronously if there are only 1 or 2 links
-            archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
+            archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
             messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
         else:
             # otherwise run in a background worker
-            result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": CONFIG.OUTPUT_DIR})
+            result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
             messages.success(
                 request,
                 mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
@@ -552,7 +551,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
     def update_snapshots(self, request, queryset):
         links = [snapshot.as_link() for snapshot in queryset]
 
-        result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR})
+        result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
 
         messages.success(
             request,
@@ -581,7 +580,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
     def overwrite_snapshots(self, request, queryset):
         links = [snapshot.as_link() for snapshot in queryset]
 
-        result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR})
+        result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
 
         messages.success(
             request,
@@ -592,7 +591,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
         description="☠️ Delete"
     )
     def delete_snapshots(self, request, queryset):
-        remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
+        remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
         messages.success(
             request,
             mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
@@ -732,7 +731,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
         )
 
     def output_summary(self, result):
-        snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
+        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
         output_str = format_html(
             '<pre style="display: inline-block">{}</pre><br/>',
             result.output,

+ 1 - 1
archivebox/index/__init__.py

@@ -243,7 +243,7 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i
     log_indexing_process_finished()
 
 @enforce_types
-def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]:
+def load_main_index(out_dir: Path | str=DATA_DIR, warn: bool=True) -> List[Link]:
     """parse and load existing index with any new links from import_path merged in"""
     from core.models import Snapshot
     try:

+ 11 - 14
archivebox/index/sql.py

@@ -8,18 +8,15 @@ from typing import List, Tuple, Iterator
 from django.db.models import QuerySet
 from django.db import transaction
 
-from .schema import Link
 from archivebox.misc.util import enforce_types, parse_date
-from ..config.legacy import (
-    OUTPUT_DIR,
-    TAG_SEPARATOR_PATTERN,
-)
+from archivebox.config import DATA_DIR, GENERAL_CONFIG
 
+from .schema import Link
 
 ### Main Links Index
 
 @enforce_types
-def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
+def parse_sql_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
     from core.models import Snapshot
 
     return (
@@ -28,7 +25,7 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
     )
 
 @enforce_types
-def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
+def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=DATA_DIR) -> None:
     if atomic:
         with transaction.atomic():
             return snapshots.delete()
@@ -44,7 +41,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
     info['created_by_id'] = created_by_id or get_or_create_system_user_pk()
 
     tag_list = list(dict.fromkeys(
-        tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
+        tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')
     ))
     info.pop('tags')
 
@@ -95,7 +92,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
 
 
 @enforce_types
-def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
+def write_sql_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
     for link in links:
         # with transaction.atomic():
             # write_link_to_sql_index(link)
@@ -103,7 +100,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by
             
 
 @enforce_types
-def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
+def write_sql_link_details(link: Link, out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
     from core.models import Snapshot
 
     # with transaction.atomic():
@@ -120,7 +117,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
     snap.title = link.title
 
     tag_list = list(
-        {tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')}
+        {tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')}
         | set(snap.tags.values_list('name', flat=True))
     )
 
@@ -130,7 +127,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
 
 
 @enforce_types
-def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
+def list_migrations(out_dir: Path=DATA_DIR) -> List[Tuple[bool, str]]:
     from django.core.management import call_command
     out = StringIO()
     call_command("showmigrations", list=True, stdout=out)
@@ -146,7 +143,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
     return migrations
 
 @enforce_types
-def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
+def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]:
     from django.core.management import call_command
     out1, out2 = StringIO(), StringIO()
     
@@ -160,6 +157,6 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
     ]
 
 @enforce_types
-def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
+def get_admins(out_dir: Path=DATA_DIR) -> List[str]:
     from django.contrib.auth.models import User
     return User.objects.filter(is_superuser=True)

+ 0 - 2
archivebox/misc/tests.py

@@ -13,7 +13,6 @@ IN_DOCKER=False
 IN_QEMU=False
 PUID=501
 PGID=20
-OUTPUT_DIR=/opt/archivebox/data
 CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
 ONLY_NEW=True
 TIMEOUT=60
@@ -173,7 +172,6 @@ IN_DOCKER = false
 IN_QEMU = false
 PUID = 501
 PGID = 20
-OUTPUT_DIR = "/opt/archivebox/data"
 CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
 ONLY_NEW = true
 TIMEOUT = 60

+ 11 - 17
archivebox/parsers/__init__.py

@@ -13,21 +13,16 @@ from typing import IO, Tuple, List, Optional
 from datetime import datetime, timezone
 from pathlib import Path 
 
+from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
 from archivebox.misc.system import atomic_write
-from ..config.legacy import (
-    ANSI,
-    OUTPUT_DIR,
-    SOURCES_DIR_NAME,
-    TIMEOUT,
-    stderr,
-    hint,
-)
+from archivebox.misc.logging import stderr, hint
 from archivebox.misc.util import (
     basename,
     htmldecode,
     download_url,
     enforce_types,
 )
+
 from ..index.schema import Link
 from ..logging_util import TimedProgress, log_source_saved
 
@@ -38,7 +33,6 @@ from . import pocket_html
 from . import pinboard_rss
 from . import shaarli_rss
 from . import medium_rss
-
 from . import netscape_html
 from . import generic_rss
 from . import generic_json
@@ -79,7 +73,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
     parse a list of URLS without touching the filesystem
     """
 
-    timer = TimedProgress(TIMEOUT * 4)
+    timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
     #urls = list(map(lambda x: x + "\n", urls))
     file = StringIO()
     file.writelines(urls)
@@ -98,7 +92,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut
        RSS feed, bookmarks export, or text file
     """
 
-    timer = TimedProgress(TIMEOUT * 4)
+    timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
     with open(source_file, 'r', encoding='utf-8') as file:
         links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser)
 
@@ -148,9 +142,9 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
 
 
 @enforce_types
-def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
+def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=DATA_DIR) -> str:
     ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
-    source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
+    source_path = str(CONSTANTS.SOURCES_DIR / filename.format(ts=ts))
 
     referenced_texts = ''
 
@@ -167,10 +161,10 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
 
 
 @enforce_types
-def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
+def save_file_as_source(path: str, timeout: int=ARCHIVING_CONFIG.TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=DATA_DIR) -> str:
     """download a given url's content into output/sources/domain-<timestamp>.txt"""
     ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
-    source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
+    source_path = str(CONSTANTS.SOURCES_DIR / filename.format(basename=basename(path), ts=ts))
 
     if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
         # Source is a URL that needs to be downloaded
@@ -183,9 +177,9 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
         except Exception as e:
             timer.end()
             print('{}[!] Failed to download {}{}\n'.format(
-                ANSI['red'],
+                SHELL_CONFIG.ANSI['red'],
                 path,
-                ANSI['reset'],
+                SHELL_CONFIG.ANSI['reset'],
             ))
             print('    ', e)
             raise e

+ 4 - 2
archivebox/queues/semaphores.py

@@ -1,10 +1,11 @@
-import time
 import uuid
 from functools import wraps
 from django.db import connection, transaction
 from django.utils import timezone
 from huey.exceptions import TaskLockedException
 
+from archivebox.config import CONSTANTS
+
 class SqliteSemaphore:
     def __init__(self, db_path, table_name, name, value=1, timeout=None):
         self.db_path = db_path
@@ -68,7 +69,8 @@ class SqliteSemaphore:
         return cursor.rowcount > 0
 
 
-LOCKS_DB_PATH = settings.CONFIG.OUTPUT_DIR / 'locks.sqlite3'
+LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
+
 
 def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
     """

+ 0 - 1
archivebox/queues/settings.py

@@ -2,7 +2,6 @@ from pathlib import Path
 
 from archivebox.config import DATA_DIR, CONSTANTS
 
-OUTPUT_DIR = DATA_DIR
 LOGS_DIR = CONSTANTS.LOGS_DIR
 TMP_DIR = CONSTANTS.TMP_DIR
 

+ 1 - 1
etc/uwsgi.ini

@@ -2,7 +2,7 @@
 socket = 127.0.0.1:3031
 chdir = ../
 http = 0.0.0.0:8001
-env = OUTPUT_DIR=./data
+env = DATA_DIR=./data
 wsgi-file = archivebox/core/wsgi.py
 processes = 4
 threads = 1