소스 검색

move util.py into misc folder

Nick Sweeting 1 년 전
부모
커밋
363a499289
68개의 변경된 파일136개의 추가작업 그리고 161개의 파일을 삭제
  1. 1 1
      archivebox/abid_utils/abid.py
  2. 1 1
      archivebox/abid_utils/admin.py
  3. 2 2
      archivebox/abx/archivebox/base_queue.py
  4. 1 1
      archivebox/api/v1_cli.py
  5. 1 1
      archivebox/cli/archivebox_add.py
  6. 1 1
      archivebox/cli/archivebox_config.py
  7. 1 1
      archivebox/cli/archivebox_help.py
  8. 1 1
      archivebox/cli/archivebox_init.py
  9. 1 1
      archivebox/cli/archivebox_list.py
  10. 1 1
      archivebox/cli/archivebox_manage.py
  11. 1 1
      archivebox/cli/archivebox_oneshot.py
  12. 1 1
      archivebox/cli/archivebox_remove.py
  13. 1 1
      archivebox/cli/archivebox_schedule.py
  14. 1 1
      archivebox/cli/archivebox_server.py
  15. 1 1
      archivebox/cli/archivebox_setup.py
  16. 1 1
      archivebox/cli/archivebox_shell.py
  17. 1 1
      archivebox/cli/archivebox_status.py
  18. 1 1
      archivebox/cli/archivebox_update.py
  19. 1 1
      archivebox/cli/archivebox_version.py
  20. 1 6
      archivebox/config/config_stubs.py
  21. 2 2
      archivebox/config/constants.py
  22. 1 1
      archivebox/core/admin.py
  23. 1 1
      archivebox/core/forms.py
  24. 2 2
      archivebox/core/models.py
  25. 1 1
      archivebox/core/views.py
  26. 1 1
      archivebox/extractors/__init__.py
  27. 1 1
      archivebox/extractors/archive_org.py
  28. 1 1
      archivebox/extractors/dom.py
  29. 1 1
      archivebox/extractors/favicon.py
  30. 1 1
      archivebox/extractors/git.py
  31. 1 1
      archivebox/extractors/headers.py
  32. 1 1
      archivebox/extractors/htmltotext.py
  33. 1 1
      archivebox/extractors/media.py
  34. 1 1
      archivebox/extractors/mercury.py
  35. 1 1
      archivebox/extractors/pdf.py
  36. 1 1
      archivebox/extractors/readability.py
  37. 1 1
      archivebox/extractors/screenshot.py
  38. 1 1
      archivebox/extractors/singlefile.py
  39. 1 1
      archivebox/extractors/title.py
  40. 1 1
      archivebox/extractors/wget.py
  41. 1 1
      archivebox/index/__init__.py
  42. 1 1
      archivebox/index/csv.py
  43. 1 1
      archivebox/index/html.py
  44. 1 1
      archivebox/index/json.py
  45. 13 13
      archivebox/index/schema.py
  46. 1 1
      archivebox/index/sql.py
  47. 2 2
      archivebox/logging_util.py
  48. 8 10
      archivebox/main.py
  49. 3 3
      archivebox/misc/system.py
  50. 0 16
      archivebox/misc/util.py
  51. 1 1
      archivebox/parsers/__init__.py
  52. 1 1
      archivebox/parsers/generic_html.py
  53. 1 1
      archivebox/parsers/generic_json.py
  54. 1 1
      archivebox/parsers/generic_jsonl.py
  55. 1 1
      archivebox/parsers/generic_rss.py
  56. 1 1
      archivebox/parsers/generic_txt.py
  57. 1 1
      archivebox/parsers/medium_rss.py
  58. 1 1
      archivebox/parsers/netscape_html.py
  59. 1 1
      archivebox/parsers/pinboard_rss.py
  60. 1 1
      archivebox/parsers/pocket_api.py
  61. 1 1
      archivebox/parsers/pocket_html.py
  62. 1 1
      archivebox/parsers/readwise_reader_api.py
  63. 1 1
      archivebox/parsers/shaarli_rss.py
  64. 1 1
      archivebox/parsers/url_list.py
  65. 1 1
      archivebox/parsers/wallabag_atom.py
  66. 2 2
      archivebox/plugins_extractor/chrome/apps.py
  67. 45 2
      archivebox/search/__init__.py
  68. 0 45
      archivebox/search/utils.py

+ 1 - 1
archivebox/abid_utils/abid.py

@@ -11,7 +11,7 @@ from uuid import UUID
 from typeid import TypeID            # type: ignore[import-untyped]
 from datetime import datetime
 
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 
 
 ABID_PREFIX_LEN = 4

+ 1 - 1
archivebox/abid_utils/admin.py

@@ -13,7 +13,7 @@ from django_object_actions import DjangoObjectActions, action
 
 from api.auth import get_or_create_api_token
 
-from ..util import parse_date
+from archivebox.misc.util import parse_date
 from .abid import ABID
 
 def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color_same: str | None=None, color_diff: str | None=None):

+ 2 - 2
archivebox/abx/archivebox/base_queue.py

@@ -25,7 +25,7 @@ class BaseQueue(BaseHook):
 
     @property
     def tasks(self) -> Dict[str, 'TaskWrapper']:
-        """Return an AttrDict of all the background worker tasks defined in the plugin's tasks.py file."""
+        """Return an dict of all the background worker tasks defined in the plugin's tasks.py file."""
         tasks = importlib.import_module(f"{self.plugin_module}.tasks")
 
         all_tasks = {}
@@ -83,7 +83,7 @@ class BaseQueue(BaseHook):
         worker = start_worker(supervisor, self.get_supervisord_config(settings), lazy=lazy)
 
         # Update settings.WORKERS to include this worker
-        settings.WORKERS = getattr(settings, "WORKERS", None) or AttrDict({})
+        settings.WORKERS = getattr(settings, "WORKERS", None) or benedict({})
         settings.WORKERS[self.id] = self.start_supervisord_worker(settings, lazy=True)
 
         return worker

+ 1 - 1
archivebox/api/v1_cli.py

@@ -12,7 +12,7 @@ from ..main import (
     list_all,
     schedule,
 )
-from ..util import ansi_to_html
+from archivebox.misc.util import ansi_to_html
 from ..config.legacy import ONLY_NEW
 
 

+ 1 - 1
archivebox/cli/archivebox_add.py

@@ -9,7 +9,7 @@ import argparse
 from typing import List, Optional, IO
 
 from ..main import add
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..parsers import PARSERS
 from ..config.legacy import OUTPUT_DIR, ONLY_NEW
 from ..logging_util import SmartFormatter, accept_stdin, stderr

+ 1 - 1
archivebox/cli/archivebox_config.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import config
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, accept_stdin
 

+ 1 - 1
archivebox/cli/archivebox_help.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import help
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_init.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import init
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_list.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import list_all
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..index import (
     LINK_FILTERS,

+ 1 - 1
archivebox/cli/archivebox_manage.py

@@ -8,7 +8,7 @@ import sys
 from typing import Optional, List, IO
 
 from ..main import manage
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 
 

+ 1 - 1
archivebox/cli/archivebox_oneshot.py

@@ -10,7 +10,7 @@ from pathlib import Path
 from typing import List, Optional, IO
 
 from ..main import oneshot
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, accept_stdin, stderr
 

+ 1 - 1
archivebox/cli/archivebox_remove.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import remove
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, accept_stdin
 

+ 1 - 1
archivebox/cli/archivebox_schedule.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import schedule
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_server.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import server
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR, BIND_ADDR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_setup.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import setup
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_shell.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import shell
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_status.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import status
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 1
archivebox/cli/archivebox_update.py

@@ -9,7 +9,7 @@ import argparse
 from typing import List, Optional, IO
 
 from ..main import update
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..index import (
     LINK_FILTERS,

+ 1 - 1
archivebox/cli/archivebox_version.py

@@ -9,7 +9,7 @@ import argparse
 from typing import Optional, List, IO
 
 from ..main import version
-from ..util import docstring
+from archivebox.misc.util import docstring
 from ..config.legacy import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 

+ 1 - 6
archivebox/config/config_stubs.py

@@ -9,17 +9,12 @@ SimpleConfigValueDict = Dict[str, SimpleConfigValue]
 SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
 ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
 
-# class AttrDict(dict):
-#     def __init__(self, *args, **kwargs):
-#         super().__init__(*args, **kwargs)
-#         self.__dict__ = self
-AttrDict = benedict  # https://github.com/fabiocaccamo/python-benedict/
 
 
 class BaseConfig(TypedDict):
     pass
 
-class ConfigDict(BaseConfig, AttrDict, total=False):
+class ConfigDict(BaseConfig, benedict, total=False):
     """
     # Regenerate by pasting this quine into `archivebox shell` 🥚
     from archivebox.config import ConfigDict, CONFIG_DEFAULTS

+ 2 - 2
archivebox/config/constants.py

@@ -173,7 +173,7 @@ class ConstantsDict(Mapping):
     # actually empty so that we dont clobber someone's home directory or desktop by accident.
     # These files are exceptions to the is_empty check when we're trying to init a new dir,
     # as they could be from a previous archivebox version, system artifacts, dependencies, etc.
-    ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
+    ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
         *INGORED_PATHS,
         *PIP_RELATED_NAMES,
         *NPM_RELATED_NAMES,
@@ -212,7 +212,7 @@ class ConstantsDict(Mapping):
     })
         
     DATA_LOCATIONS = benedict({
-        "OUTPUT_DIR": {
+        "DATA_DIR": {
             "path": DATA_DIR.resolve(),
             "enabled": True,
             "is_valid": DATABASE_FILE.exists(),

+ 1 - 1
archivebox/core/admin.py

@@ -23,7 +23,7 @@ from signal_webhooks.utils import get_webhook_model
 
 from archivebox.config import VERSION
 
-from ..util import htmldecode, urldecode
+from archivebox.misc.util import htmldecode, urldecode
 
 from core.models import Snapshot, ArchiveResult, Tag
 from core.mixins import SearchResultsAdminMixin

+ 1 - 1
archivebox/core/forms.py

@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
 
 from django import forms
 
-from ..util import URL_REGEX
+from archivebox.misc.util import URL_REGEX
 from ..parsers import PARSERS
 from taggit.utils import edit_string_for_tags, parse_tags
 

+ 2 - 2
archivebox/core/models.py

@@ -23,7 +23,7 @@ from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 from queues.tasks import bg_archive_snapshot
 
 from archivebox.misc.system import get_dir_size
-from ..util import parse_date, base_url
+from archivebox.misc.util import parse_date, base_url
 from ..index.schema import Link
 from ..index.html import snapshot_icons
 from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
@@ -231,7 +231,7 @@ class Snapshot(ABIDModel):
 
     @cached_property
     def extension(self) -> str:
-        from ..util import extension
+        from archivebox.misc.util import extension
         return extension(self.url)
 
     @cached_property

+ 1 - 1
archivebox/core/views.py

@@ -37,7 +37,7 @@ from ..config.legacy import (
     CONFIG,
 )
 from ..logging_util import printable_filesize
-from ..util import base_url, htmlencode, ts_to_date_str
+from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
 from ..search import query_search_index
 from .serve_static import serve_static_with_byterange_support
 

+ 1 - 1
archivebox/extractors/__init__.py

@@ -20,7 +20,7 @@ from ..index import (
     load_link_details,
     write_link_details,
 )
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 from ..logging_util import (
     log_archiving_started,
     log_archiving_paused,

+ 1 - 1
archivebox/extractors/archive_org.py

@@ -7,7 +7,7 @@ from collections import defaultdict
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
     dedupe,

+ 1 - 1
archivebox/extractors/dom.py

@@ -5,7 +5,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file, atomic_write
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
 )

+ 1 - 1
archivebox/extractors/favicon.py

@@ -6,7 +6,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from archivebox.misc.system import chmod_file, run
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     domain,
     dedupe,

+ 1 - 1
archivebox/extractors/git.py

@@ -6,7 +6,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
     domain,

+ 1 - 1
archivebox/extractors/headers.py

@@ -6,7 +6,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from archivebox.misc.system import atomic_write
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     get_headers,
     dedupe,

+ 1 - 1
archivebox/extractors/htmltotext.py

@@ -13,7 +13,7 @@ from ..config.legacy import (
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from ..logging_util import TimedProgress
 from archivebox.misc.system import atomic_write
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
 )

+ 1 - 1
archivebox/extractors/media.py

@@ -5,7 +5,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import enforce_types, is_static_file, dedupe
+from archivebox.misc.util import enforce_types, is_static_file, dedupe
 from ..logging_util import TimedProgress
 
 

+ 1 - 1
archivebox/extractors/mercury.py

@@ -8,7 +8,7 @@ import json
 
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from archivebox.misc.system import run, atomic_write
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
     dedupe,

+ 1 - 1
archivebox/extractors/pdf.py

@@ -5,7 +5,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     is_static_file,
 )

+ 1 - 1
archivebox/extractors/readability.py

@@ -8,7 +8,7 @@ import json
 
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from archivebox.misc.system import run, atomic_write
-from ..util import enforce_types, is_static_file
+from archivebox.misc.util import enforce_types, is_static_file
 from ..logging_util import TimedProgress
 from .title import get_html
 

+ 1 - 1
archivebox/extractors/screenshot.py

@@ -5,7 +5,7 @@ from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import enforce_types, is_static_file
+from archivebox.misc.util import enforce_types, is_static_file
 from ..logging_util import TimedProgress
 
 

+ 1 - 1
archivebox/extractors/singlefile.py

@@ -7,7 +7,7 @@ import json
 
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import enforce_types, is_static_file, dedupe
+from archivebox.misc.util import enforce_types, is_static_file, dedupe
 from ..logging_util import TimedProgress
 
 

+ 1 - 1
archivebox/extractors/title.py

@@ -6,7 +6,7 @@ from pathlib import Path
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     download_url,
     htmldecode,

+ 1 - 1
archivebox/extractors/wget.py

@@ -8,7 +8,7 @@ from datetime import datetime, timezone
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     without_fragment,
     without_query,

+ 1 - 1
archivebox/index/__init__.py

@@ -13,7 +13,7 @@ from django.db.models import QuerySet, Q
 
 
 from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG
-from ..util import (
+from archivebox.misc.util import (
     scheme,
     enforce_types,
     ExtendedEncoder,

+ 1 - 1
archivebox/index/csv.py

@@ -2,7 +2,7 @@ __package__ = 'archivebox.index'
 
 from typing import List, Optional, Any
 
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 from .schema import Link
 
 

+ 1 - 1
archivebox/index/html.py

@@ -11,7 +11,7 @@ from django.core.cache import cache
 from .schema import Link
 from archivebox.misc.system import atomic_write
 from ..logging_util import printable_filesize
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     ts_to_date_str,
     urlencode,

+ 1 - 1
archivebox/index/json.py

@@ -12,7 +12,7 @@ from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL
 
 from .schema import Link
 from archivebox.misc.system import atomic_write
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 
 
 

+ 13 - 13
archivebox/index/schema.py

@@ -22,7 +22,7 @@ from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from plugins_extractor.favicon.apps import FAVICON_CONFIG
 
 from archivebox.misc.system import get_dir_size
-from ..util import ts_to_date_str, parse_date
+from archivebox.misc.util import ts_to_date_str, parse_date
 
 
 class ArchiveError(Exception):
@@ -67,7 +67,7 @@ class ArchiveResult:
 
     @classmethod
     def guess_ts(_cls, dict_info):
-        from ..util import parse_date
+        from archivebox.misc.util import parse_date
         parsed_timestamp = parse_date(dict_info["timestamp"])
         start_ts = parsed_timestamp
         end_ts = parsed_timestamp + timedelta(seconds=int(dict_info["duration"]))
@@ -75,7 +75,7 @@ class ArchiveResult:
 
     @classmethod
     def from_json(cls, json_info, guess=False):
-        from ..util import parse_date
+        from archivebox.misc.util import parse_date
 
         info = {
             key: val
@@ -231,7 +231,7 @@ class Link:
 
     @classmethod
     def from_json(cls, json_info, guess=False):
-        from ..util import parse_date
+        from archivebox.misc.util import parse_date
         
         info = {
             key: val
@@ -299,38 +299,38 @@ class Link:
     ### URL Helpers
     @property
     def url_hash(self):
-        from ..util import hashurl
+        from archivebox.misc.util import hashurl
 
         return hashurl(self.url)
 
     @property
     def scheme(self) -> str:
-        from ..util import scheme
+        from archivebox.misc.util import scheme
         return scheme(self.url)
 
     @property
     def extension(self) -> str:
-        from ..util import extension
+        from archivebox.misc.util import extension
         return extension(self.url)
 
     @property
     def domain(self) -> str:
-        from ..util import domain
+        from archivebox.misc.util import domain
         return domain(self.url)
 
     @property
     def path(self) -> str:
-        from ..util import path
+        from archivebox.misc.util import path
         return path(self.url)
 
     @property
     def basename(self) -> str:
-        from ..util import basename
+        from archivebox.misc.util import basename
         return basename(self.url)
 
     @property
     def base_url(self) -> str:
-        from ..util import base_url
+        from archivebox.misc.util import base_url
         return base_url(self.url)
 
     ### Pretty Printing Helpers
@@ -380,12 +380,12 @@ class Link:
 
     @property
     def is_static(self) -> bool:
-        from ..util import is_static_file
+        from archivebox.misc.util import is_static_file
         return is_static_file(self.url)
 
     @property
     def is_archived(self) -> bool:
-        from ..util import domain
+        from archivebox.misc.util import domain
 
         output_paths = (
             domain(self.url),

+ 1 - 1
archivebox/index/sql.py

@@ -9,7 +9,7 @@ from django.db.models import QuerySet
 from django.db import transaction
 
 from .schema import Link
-from ..util import enforce_types, parse_date
+from archivebox.misc.util import enforce_types, parse_date
 from ..config.legacy import (
     OUTPUT_DIR,
     TAG_SEPARATOR_PATTERN,

+ 2 - 2
archivebox/logging_util.py

@@ -23,8 +23,8 @@ from rich.panel import Panel
 
 from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
 from archivebox.misc.system import get_dir_size
-from .util import enforce_types
-from .misc.logging import ANSI, stderr
+from archivebox.misc.util import enforce_types
+from archivebox.misc.logging import ANSI, stderr
 
 @dataclass
 class RuntimeStats:

+ 8 - 10
archivebox/main.py

@@ -28,10 +28,10 @@ from .parsers import (
     save_file_as_source,
     parse_links_memory,
 )
-from .index.schema import Link
-from .util import enforce_types                         # type: ignore
+from archivebox.misc.util import enforce_types                         # type: ignore
 from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
 from archivebox.misc.system import run as run_shell
+from .index.schema import Link
 from .index import (
     load_main_index,
     parse_links_from_source,
@@ -61,14 +61,12 @@ from .index.sql import (
     apply_migrations,
     remove_from_sql_main_index,
 )
-from .index.html import (
-    generate_index_from_links,
-)
+from .index.html import generate_index_from_links
 from .index.csv import links_to_csv
 from .extractors import archive_links, archive_link, ignore_methods
-from .misc.logging import stderr, hint
-from .misc.checks import check_data_folder
-from .config.legacy import (
+from archivebox.misc.logging import stderr, hint
+from archivebox.misc.checks import check_data_folder
+from archivebox.config.legacy import (
     write_config_file,
     DEPENDENCIES,
     load_all_config,
@@ -194,7 +192,7 @@ def version(quiet: bool=False,
             f'PLATFORM={platform.platform()}',
             f'PYTHON={sys.implementation.name.title()}',
         )
-        OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['DATA_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
+        OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
         print(
             f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
             f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
@@ -221,7 +219,7 @@ def version(quiet: bool=False,
                 
         print()
         print('{white}[i] New dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI))
-        for name, binary in settings.BINARIES.items():
+        for name, binary in reversed(list(settings.BINARIES.items())):
             err = None
             try:
                 loaded_bin = binary.load()

+ 3 - 3
archivebox/misc/system.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox'
+__package__ = 'archivebox.misc'
 
 
 import os
@@ -14,8 +14,8 @@ from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedPro
 from crontab import CronTab
 from atomicwrites import atomic_write as lib_atomic_write
 
-from .util import enforce_types, ExtendedEncoder
-from .config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
+from archivebox.config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
+from archivebox.misc.util import enforce_types, ExtendedEncoder
 
 
 def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):

+ 0 - 16
archivebox/util.py → archivebox/misc/util.py

@@ -317,22 +317,6 @@ def dedupe(options: List[str]) -> List[str]:
     return list(deduped.values())
 
 
-class AttributeDict(dict):
-    """Helper to allow accessing dict values via Example.key or Example['key']"""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # Recursively convert nested dicts to AttributeDicts (optional):
-        # for key, val in self.items():
-        #     if isinstance(val, dict) and type(val) is not AttributeDict:
-        #         self[key] = AttributeDict(val)
-
-    def __getattr__(self, attr: str) -> Any:
-        return dict.__getitem__(self, attr)
-
-    def __setattr__(self, attr: str, value: Any) -> None:
-        return dict.__setitem__(self, attr, value)
-
 
 class ExtendedEncoder(pyjson.JSONEncoder):
     """

+ 1 - 1
archivebox/parsers/__init__.py

@@ -22,7 +22,7 @@ from ..config.legacy import (
     stderr,
     hint,
 )
-from ..util import (
+from archivebox.misc.util import (
     basename,
     htmldecode,
     download_url,

+ 1 - 1
archivebox/parsers/generic_html.py

@@ -7,7 +7,7 @@ from typing import IO, Iterable, Optional
 from datetime import datetime, timezone
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
     find_all_urls,

+ 1 - 1
archivebox/parsers/generic_json.py

@@ -6,7 +6,7 @@ from typing import IO, Iterable
 from datetime import datetime, timezone
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
 )

+ 1 - 1
archivebox/parsers/generic_jsonl.py

@@ -5,7 +5,7 @@ import json
 from typing import IO, Iterable
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
 )
 

+ 1 - 1
archivebox/parsers/generic_rss.py

@@ -6,7 +6,7 @@ from time import mktime
 from feedparser import parse as feedparser
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types
 )

+ 1 - 1
archivebox/parsers/generic_txt.py

@@ -6,7 +6,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
     find_all_urls,

+ 1 - 1
archivebox/parsers/medium_rss.py

@@ -7,7 +7,7 @@ from datetime import datetime
 from xml.etree import ElementTree
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
 )

+ 1 - 1
archivebox/parsers/netscape_html.py

@@ -7,7 +7,7 @@ from typing import IO, Iterable
 from datetime import datetime
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
 )

+ 1 - 1
archivebox/parsers/pinboard_rss.py

@@ -6,7 +6,7 @@ from time import mktime
 from feedparser import parse as feedparser
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types
 )

+ 1 - 1
archivebox/parsers/pocket_api.py

@@ -11,7 +11,7 @@ from pocket import Pocket
 from archivebox.config import CONSTANTS
 
 from ..index.schema import Link
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 from archivebox.misc.system import atomic_write
 from ..config.legacy import (
     POCKET_CONSUMER_KEY,

+ 1 - 1
archivebox/parsers/pocket_html.py

@@ -7,7 +7,7 @@ from typing import IO, Iterable
 from datetime import datetime
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
 )

+ 1 - 1
archivebox/parsers/readwise_reader_api.py

@@ -11,7 +11,7 @@ from configparser import ConfigParser
 from archivebox.config import CONSTANTS
 
 from ..index.schema import Link
-from ..util import enforce_types
+from archivebox.misc.util import enforce_types
 from archivebox.misc.system import atomic_write
 from ..config.legacy import READWISE_READER_TOKENS
 

+ 1 - 1
archivebox/parsers/shaarli_rss.py

@@ -5,7 +5,7 @@ from typing import IO, Iterable
 from datetime import datetime
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
     str_between,

+ 1 - 1
archivebox/parsers/url_list.py

@@ -7,7 +7,7 @@ from typing import IO, Iterable
 from datetime import datetime, timezone
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     enforce_types,
     URL_REGEX,
 )

+ 1 - 1
archivebox/parsers/wallabag_atom.py

@@ -5,7 +5,7 @@ from typing import IO, Iterable
 from datetime import datetime
 
 from ..index.schema import Link
-from ..util import (
+from archivebox.misc.util import (
     htmldecode,
     enforce_types,
     str_between,

+ 2 - 2
archivebox/plugins_extractor/chrome/apps.py

@@ -3,7 +3,7 @@ __package__ = 'archivebox.plugins_extractor.chrome'
 import sys
 import platform
 from pathlib import Path
-from typing import List, Optional, Dict, ClassVar
+from typing import List, Optional, Dict
 
 # Depends on other PyPI/vendor packages:
 from rich import print
@@ -29,7 +29,7 @@ from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG
 from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
 from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
 
-from ...util import dedupe
+from archivebox.misc.util import dedupe
 
 
 CHROMIUM_BINARY_NAMES_LINUX = [

+ 45 - 2
archivebox/search/__init__.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.search'
+
 from typing import List, Union
 from pathlib import Path
 
@@ -5,12 +7,53 @@ from django.db.models import QuerySet
 from django.conf import settings
 
 from archivebox.index.schema import Link
-from archivebox.util import enforce_types
+from archivebox.misc.util import enforce_types
 from archivebox.misc.logging import stderr
+from archivebox.config.legacy import ANSI
 
 # from archivebox.archivebox.config import settings.CONFIGS.SearchBackendConfig
 
-from .utils import get_indexable_content, log_index_started
+
+def log_index_started(url):
+    print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))
+    print( )
+
+def get_file_result_content(res, extra_path, use_pwd=False):
+    if use_pwd: 
+        fpath = f'{res.pwd}/{res.output}'
+    else:
+        fpath = f'{res.output}'
+    
+    if extra_path:
+        fpath = f'{fpath}/{extra_path}'
+
+    with open(fpath, 'r', encoding='utf-8') as file:
+        data = file.read()
+    if data:
+        return [data]
+    return []
+
+
+# This should be abstracted by a plugin interface for extractors
+@enforce_types
+def get_indexable_content(results: QuerySet):
+    if not results:
+        return []
+    # Only use the first method available
+    res, method = results.first(), results.first().extractor
+    if method not in ('readability', 'singlefile', 'dom', 'wget'):
+        return []
+    # This should come from a plugin interface
+
+    # TODO: banish this duplication and get these from the extractor file
+    if method == 'readability':
+        return get_file_result_content(res, 'content.txt', use_pwd=True)
+    elif method == 'singlefile':
+        return get_file_result_content(res, '', use_pwd=True)
+    elif method == 'dom':
+        return get_file_result_content(res, '', use_pwd=True)
+    elif method == 'wget':
+        return get_file_result_content(res, '', use_pwd=True)
 
 
 def import_backend():

+ 0 - 45
archivebox/search/utils.py

@@ -1,45 +0,0 @@
-from django.db.models import QuerySet
-
-from archivebox.util import enforce_types
-from archivebox.config.legacy import ANSI
-
-def log_index_started(url):
-    print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))
-    print( )
-
-def get_file_result_content(res, extra_path, use_pwd=False):
-    if use_pwd: 
-        fpath = f'{res.pwd}/{res.output}'
-    else:
-        fpath = f'{res.output}'
-    
-    if extra_path:
-        fpath = f'{fpath}/{extra_path}'
-
-    with open(fpath, 'r', encoding='utf-8') as file:
-        data = file.read()
-    if data:
-        return [data]
-    return []
-
-
-# This should be abstracted by a plugin interface for extractors
-@enforce_types
-def get_indexable_content(results: QuerySet):
-    if not results:
-        return []
-    # Only use the first method available
-    res, method = results.first(), results.first().extractor
-    if method not in ('readability', 'singlefile', 'dom', 'wget'):
-        return []
-    # This should come from a plugin interface
-
-    # TODO: banish this duplication and get these from the extractor file
-    if method == 'readability':
-        return get_file_result_content(res, 'content.txt', use_pwd=True)
-    elif method == 'singlefile':
-        return get_file_result_content(res, '', use_pwd=True)
-    elif method == 'dom':
-        return get_file_result_content(res, '', use_pwd=True)
-    elif method == 'wget':
-        return get_file_result_content(res, '', use_pwd=True)