Browse Source

fix rich logging issues

Nick Sweeting 1 year ago
parent
commit
e99260feb2

+ 0 - 1
archivebox/api/models.py

@@ -1,6 +1,5 @@
 __package__ = 'archivebox.api'
 __package__ = 'archivebox.api'
 
 
-import uuid
 import secrets
 import secrets
 from datetime import timedelta
 from datetime import timedelta
 
 

+ 1 - 1
archivebox/core/admin.py

@@ -30,7 +30,7 @@ from core.models import Snapshot, ArchiveResult, Tag
 from core.mixins import SearchResultsAdminMixin
 from core.mixins import SearchResultsAdminMixin
 from api.models import APIToken
 from api.models import APIToken
 from abid_utils.admin import ABIDModelAdmin
 from abid_utils.admin import ABIDModelAdmin
-from queues.tasks import bg_archive_links, bg_add
+from queues.tasks import bg_archive_links, bg_archive_link, bg_add
 
 
 from index.html import snapshot_icons
 from index.html import snapshot_icons
 from logging_util import printable_filesize
 from logging_util import printable_filesize

+ 5 - 0
archivebox/core/models.py

@@ -19,6 +19,8 @@ from django.conf import settings
 
 
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 
 
+from queues.tasks import bg_archive_snapshot
+
 from ..system import get_dir_size
 from ..system import get_dir_size
 from ..util import parse_date, base_url
 from ..util import parse_date, base_url
 from ..index.schema import Link
 from ..index.schema import Link
@@ -160,6 +162,9 @@ class Snapshot(ABIDModel):
         
         
         super().save(*args, **kwargs)
         super().save(*args, **kwargs)
 
 
+    def archive(self, overwrite=False, methods=None):
+        result = bg_archive_snapshot(self, overwrite=overwrite, methods=methods)
+        return result
 
 
     def __repr__(self) -> str:
     def __repr__(self) -> str:
         title = (self.title_stripped or '-')[:64]
         title = (self.title_stripped or '-')[:64]

+ 4 - 1
archivebox/core/settings_logging.py

@@ -1,3 +1,4 @@
+__package__ = 'archivebox.core'
 import re
 import re
 import tempfile
 import tempfile
 import logging
 import logging
@@ -7,6 +8,8 @@ import django.template
 
 
 import archivebox
 import archivebox
 
 
+from ..misc.logging import IS_TTY
+
 
 
 IGNORABLE_URL_PATTERNS = [
 IGNORABLE_URL_PATTERNS = [
     re.compile(r"/.*/?apple-touch-icon.*\.png"),
     re.compile(r"/.*/?apple-touch-icon.*\.png"),
@@ -101,7 +104,7 @@ SETTINGS_LOGGING = {
             "formatter": "rich",
             "formatter": "rich",
             "level": "DEBUG",
             "level": "DEBUG",
             "markup": False,
             "markup": False,
-            "rich_tracebacks": True,
+            "rich_tracebacks": IS_TTY,
             "filters": ["noisyrequestsfilter"],
             "filters": ["noisyrequestsfilter"],
             "tracebacks_suppress": [
             "tracebacks_suppress": [
                 django,
                 django,

+ 29 - 47
archivebox/logging_util.py

@@ -242,7 +242,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
         args=args,
         args=args,
     )
     )
     # stderr()
     # stderr()
-    # stderr('{black}    > {pwd}{reset}'.format(pwd=pwd, **ANSI))
+    # stderr('[bright_black]    > {pwd}[/]'.format(pwd=pwd, **ANSI))
     # stderr()
     # stderr()
     if SHOW_PROGRESS:
     if SHOW_PROGRESS:
         print(Panel(version_msg), file=sys.stderr)
         print(Panel(version_msg), file=sys.stderr)
@@ -254,12 +254,11 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
 
 
 def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool):
 def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool):
     _LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc)
     _LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc)
-    print('{green}[+] [{}] Adding {} links to index (crawl depth={}){}...{reset}'.format(
+    print('[green][+] [{}] Adding {} links to index (crawl depth={}){}...[/]'.format(
         _LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'),
         _LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'),
         len(urls) if isinstance(urls, list) else len(urls.split('\n')),
         len(urls) if isinstance(urls, list) else len(urls.split('\n')),
         depth,
         depth,
         ' (index only)' if index_only else '',
         ' (index only)' if index_only else '',
-        **ANSI,
     ))
     ))
 
 
 def log_source_saved(source_file: str):
 def log_source_saved(source_file: str):
@@ -275,7 +274,7 @@ def log_deduping_finished(num_new_links: int):
 
 
 def log_crawl_started(new_links):
 def log_crawl_started(new_links):
     print()
     print()
-    print('{green}[*] Starting crawl of {} sites 1 hop out from starting point{reset}'.format(len(new_links), **ANSI))
+    print(f'[green][*] Starting crawl of {len(new_links)} sites 1 hop out from starting point[/]')
 
 
 ### Indexing Stage
 ### Indexing Stage
 
 
@@ -283,10 +282,9 @@ def log_indexing_process_started(num_links: int):
     start_ts = datetime.now(timezone.utc)
     start_ts = datetime.now(timezone.utc)
     _LAST_RUN_STATS.index_start_ts = start_ts
     _LAST_RUN_STATS.index_start_ts = start_ts
     print()
     print()
-    print('{black}[*] [{}] Writing {} links to main index...{reset}'.format(
+    print('[bright_black][*] [{}] Writing {} links to main index...[/]'.format(
         start_ts.strftime('%Y-%m-%d %H:%M:%S'),
         start_ts.strftime('%Y-%m-%d %H:%M:%S'),
         num_links,
         num_links,
-        **ANSI,
     ))
     ))
 
 
 
 
@@ -312,17 +310,15 @@ def log_archiving_started(num_links: int, resume: Optional[float]=None):
     _LAST_RUN_STATS.archiving_start_ts = start_ts
     _LAST_RUN_STATS.archiving_start_ts = start_ts
     print()
     print()
     if resume:
     if resume:
-        print('{green}[▶] [{}] Resuming archive updating for {} pages starting from {}...{reset}'.format(
+        print('[green][▶] [{}] Resuming archive updating for {} pages starting from {}...[/]'.format(
              start_ts.strftime('%Y-%m-%d %H:%M:%S'),
              start_ts.strftime('%Y-%m-%d %H:%M:%S'),
              num_links,
              num_links,
              resume,
              resume,
-             **ANSI,
         ))
         ))
     else:
     else:
-        print('{green}[▶] [{}] Starting archiving of {} snapshots in index...{reset}'.format(
+        print('[green][▶] [{}] Starting archiving of {} snapshots in index...[/]'.format(
              start_ts.strftime('%Y-%m-%d %H:%M:%S'),
              start_ts.strftime('%Y-%m-%d %H:%M:%S'),
              num_links,
              num_links,
-             **ANSI,
         ))
         ))
 
 
 def log_archiving_paused(num_links: int, idx: int, timestamp: str):
 def log_archiving_paused(num_links: int, idx: int, timestamp: str):
@@ -330,8 +326,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
     end_ts = datetime.now(timezone.utc)
     end_ts = datetime.now(timezone.utc)
     _LAST_RUN_STATS.archiving_end_ts = end_ts
     _LAST_RUN_STATS.archiving_end_ts = end_ts
     print()
     print()
-    print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
-        **ANSI,
+    print('\n[yellow3][X] [{now}] Downloading paused on link {timestamp} ({idx}/{total})[/]'.format(
         now=end_ts.strftime('%Y-%m-%d %H:%M:%S'),
         now=end_ts.strftime('%Y-%m-%d %H:%M:%S'),
         idx=idx+1,
         idx=idx+1,
         timestamp=timestamp,
         timestamp=timestamp,
@@ -355,12 +350,10 @@ def log_archiving_finished(num_links: int):
         duration = '{0:.2f} sec'.format(seconds)
         duration = '{0:.2f} sec'.format(seconds)
 
 
     print()
     print()
-    print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
-        ANSI['green'],
+    print('[green][√] [{}] Update of {} pages complete ({})[/]'.format(
         end_ts.strftime('%Y-%m-%d %H:%M:%S'),
         end_ts.strftime('%Y-%m-%d %H:%M:%S'),
         num_links,
         num_links,
         duration,
         duration,
-        ANSI['reset'],
     ))
     ))
     print('    - {} links skipped'.format(_LAST_RUN_STATS.skipped))
     print('    - {} links skipped'.format(_LAST_RUN_STATS.skipped))
     print('    - {} links updated'.format(_LAST_RUN_STATS.succeeded + _LAST_RUN_STATS.failed))
     print('    - {} links updated'.format(_LAST_RUN_STATS.succeeded + _LAST_RUN_STATS.failed))
@@ -368,7 +361,7 @@ def log_archiving_finished(num_links: int):
     
     
     if Snapshot.objects.count() < 50:
     if Snapshot.objects.count() < 50:
         print()
         print()
-        print('    {lightred}Hint:{reset} To manage your archive in a Web UI, run:'.format(**ANSI))
+        print('    [violet]Hint:[/] To manage your archive in a Web UI, run:')
         print('        archivebox server 0.0.0.0:8000')
         print('        archivebox server 0.0.0.0:8000')
 
 
 
 
@@ -378,14 +371,13 @@ def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool):
     #     http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
     #     http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
     #     > output/archive/1478739709
     #     > output/archive/1478739709
 
 
-    print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format(
-        symbol_color=ANSI['green' if is_new else 'black'],
+    print('\n[[{symbol_color}]{symbol}[/]] [[{symbol_color}]{now}[/]] "{title}"'.format(
+        symbol_color='green' if is_new else 'bright_black',
         symbol='+' if is_new else '√',
         symbol='+' if is_new else '√',
         now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
         now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
         title=link.title or link.base_url,
         title=link.title or link.base_url,
-        **ANSI,
     ))
     ))
-    print('    {blue}{url}{reset}'.format(url=link.url, **ANSI))
+    print(f'    [sky_blue1]{link.url}[/]')
     print('    {} {}'.format(
     print('    {} {}'.format(
         '>' if is_new else '√',
         '>' if is_new else '√',
         pretty_path(link_dir),
         pretty_path(link_dir),
@@ -408,7 +400,7 @@ def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats
 
 
     end_ts = datetime.now(timezone.utc)
     end_ts = datetime.now(timezone.utc)
     duration = str(end_ts - start_ts).split('.')[0]
     duration = str(end_ts - start_ts).split('.')[0]
-    print('        {black}{} files ({}) in {}s {reset}'.format(size[2], printable_filesize(size[0]), duration, **ANSI))
+    print('        [bright_black]{} files ({}) in {}s [/]'.format(size[2], printable_filesize(size[0]), duration))
 
 
 
 
 def log_archive_method_started(method: str):
 def log_archive_method_started(method: str):
@@ -429,16 +421,13 @@ def log_archive_method_finished(result: "ArchiveResult"):
         if result.output.__class__.__name__ == 'TimeoutExpired':
         if result.output.__class__.__name__ == 'TimeoutExpired':
             duration = (result.end_ts - result.start_ts).seconds
             duration = (result.end_ts - result.start_ts).seconds
             hint_header = [
             hint_header = [
-                '{lightyellow}Extractor timed out after {}s.{reset}'.format(duration, **ANSI),
+                f'[yellow3]Extractor timed out after {duration}s.[/]',
             ]
             ]
         else:
         else:
+            error_name = result.output.__class__.__name__.replace('ArchiveError', '')
             hint_header = [
             hint_header = [
-                '{lightyellow}Extractor failed:{reset}'.format(**ANSI),
-                '    {reset}{} {red}{}{reset}'.format(
-                    result.output.__class__.__name__.replace('ArchiveError', ''),
-                    result.output, 
-                    **ANSI,
-                ),
+                '[yellow3]Extractor failed:[/]',
+                f'    {error_name} [red1]{result.output}[/]',
             ]
             ]
         
         
         # import pudb; pudb.set_trace()
         # import pudb; pudb.set_trace()
@@ -454,7 +443,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
                 hints = hints.split('\n')
                 hints = hints.split('\n')
 
 
             hints = (
             hints = (
-                '    {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
+                f'    [yellow1]{line.strip()}[/]'
                 for line in list(hints)[:5] if line.strip()
                 for line in list(hints)[:5] if line.strip()
             )
             )
 
 
@@ -468,7 +457,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
         output_lines = [
         output_lines = [
             *hint_header,
             *hint_header,
             *hints,
             *hints,
-            '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
+            '[violet]Run to see full output:[/]',
             *docker_hints,
             *docker_hints,
             *(['    cd {};'.format(result.pwd)] if result.pwd else []),
             *(['    cd {};'.format(result.pwd)] if result.pwd else []),
             '    {}'.format(quoted_cmd),
             '    {}'.format(quoted_cmd),
@@ -482,10 +471,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
 
 
 
 
 def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
 def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
-    print('{green}[*] Finding links in the archive index matching these {} patterns:{reset}'.format(
-        filter_type,
-        **ANSI,
-    ))
+    print(f'[green][*] Finding links in the archive index matching these {filter_type} patterns:[/]')
     print('    {}'.format(' '.join(filter_patterns or ())))
     print('    {}'.format(' '.join(filter_patterns or ())))
 
 
 def log_list_finished(links):
 def log_list_finished(links):
@@ -498,7 +484,7 @@ def log_list_finished(links):
 
 
 
 
 def log_removal_started(links: List["Link"], yes: bool, delete: bool):
 def log_removal_started(links: List["Link"], yes: bool, delete: bool):
-    print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
+    print(f'[yellow3][i] Found {len(links)} matching URLs to remove.[/]')
     if delete:
     if delete:
         file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
         file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
         print(
         print(
@@ -513,7 +499,7 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
 
 
     if not yes:
     if not yes:
         print()
         print()
-        print('{lightyellow}[?] Do you want to proceed with removing these {} links?{reset}'.format(len(links), **ANSI))
+        print('[yellow3][?] Do you want to proceed with removing these {len(links)} links?[/]')
         try:
         try:
             assert input('    y/[n]: ').lower() == 'y'
             assert input('    y/[n]: ').lower() == 'y'
         except (KeyboardInterrupt, EOFError, AssertionError):
         except (KeyboardInterrupt, EOFError, AssertionError):
@@ -522,28 +508,24 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
 def log_removal_finished(all_links: int, to_remove: int):
 def log_removal_finished(all_links: int, to_remove: int):
     if all_links == 0:
     if all_links == 0:
         print()
         print()
-        print('{red}[X] No matching links found.{reset}'.format(**ANSI))
+        print('[red1][X] No matching links found.[/]')
     else:
     else:
         print()
         print()
-        print('{red}[√] Removed {} out of {} links from the archive index.{reset}'.format(
-            to_remove,
-            all_links,
-            **ANSI,
-        ))
-        print('    Index now contains {} links.'.format(all_links - to_remove))
+        print(f'[red1][√] Removed {to_remove} out of {all_links} links from the archive index.[/]')
+        print(f'    Index now contains {all_links - to_remove} links.')
 
 
 
 
 def log_shell_welcome_msg():
 def log_shell_welcome_msg():
     from .cli import CLI_SUBCOMMANDS
     from .cli import CLI_SUBCOMMANDS
 
 
-    print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
-    print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
-    print('{green}from cli import *\n    {}{reset}'.format("\n    ".join(CLI_SUBCOMMANDS.keys()), **ANSI))
+    print('[green]# ArchiveBox Imports[/]')
+    print('[green]from core.models import Snapshot, ArchiveResult, Tag, User[/]')
+    print('[green]from cli import *\n    {}[/]'.format("\n    ".join(CLI_SUBCOMMANDS.keys())))
     print()
     print()
     print('[i] Welcome to the ArchiveBox Shell!')
     print('[i] Welcome to the ArchiveBox Shell!')
     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
     print()
     print()
-    print('    {lightred}Hint:{reset} Example use:'.format(**ANSI))
+    print('    [violet]Hint:[/] Example use:')
     print('        print(Snapshot.objects.filter(is_archived=True).count())')
     print('        print(Snapshot.objects.filter(is_archived=True).count())')
     print('        Snapshot.objects.get(url="https://example.com").as_json()')
     print('        Snapshot.objects.get(url="https://example.com").as_json()')
     print('        add("https://example.com/some/new/url")')
     print('        add("https://example.com/some/new/url")')