logs.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. import os
  2. import sys
  3. from datetime import datetime
  4. from dataclasses import dataclass
  5. from typing import Optional
  6. from .schema import Link, ArchiveResult
  7. from .config import ANSI, OUTPUT_DIR
  8. @dataclass
  9. class RuntimeStats:
  10. """mutable stats counter for logging archiving timing info to CLI output"""
  11. skipped: int = 0
  12. succeeded: int = 0
  13. failed: int = 0
  14. parse_start_ts: Optional[datetime] = None
  15. parse_end_ts: Optional[datetime] = None
  16. index_start_ts: Optional[datetime] = None
  17. index_end_ts: Optional[datetime] = None
  18. archiving_start_ts: Optional[datetime] = None
  19. archiving_end_ts: Optional[datetime] = None
  20. # globals are bad, mmkay
  21. _LAST_RUN_STATS = RuntimeStats()
  22. def pretty_path(path: str) -> str:
  23. """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
  24. pwd = os.path.abspath('.')
  25. # parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
  26. return path.replace(pwd + '/', './')
  27. ### Parsing Stage
  28. def log_parsing_started(source_file: str):
  29. start_ts = datetime.now()
  30. _LAST_RUN_STATS.parse_start_ts = start_ts
  31. print('{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
  32. start_ts.strftime('%Y-%m-%d %H:%M:%S'),
  33. source_file.rsplit('/', 1)[-1],
  34. **ANSI,
  35. ))
  36. def log_parsing_finished(num_parsed: int, num_new_links: int, parser_name: str):
  37. end_ts = datetime.now()
  38. _LAST_RUN_STATS.parse_end_ts = end_ts
  39. print(' > Parsed {} links as {} ({} new links added)'.format(num_parsed, parser_name, num_new_links))
  40. ### Indexing Stage
  41. def log_indexing_process_started():
  42. start_ts = datetime.now()
  43. _LAST_RUN_STATS.index_start_ts = start_ts
  44. print('{green}[*] [{}] Saving main index files...{reset}'.format(
  45. start_ts.strftime('%Y-%m-%d %H:%M:%S'),
  46. **ANSI,
  47. ))
  48. def log_indexing_started(out_dir: str, out_file: str):
  49. sys.stdout.write(' > {}/{}'.format(pretty_path(out_dir), out_file))
  50. def log_indexing_finished(out_dir: str, out_file: str):
  51. end_ts = datetime.now()
  52. _LAST_RUN_STATS.index_end_ts = end_ts
  53. print('\r √ {}/{}'.format(pretty_path(out_dir), out_file))
  54. ### Archiving Stage
  55. def log_archiving_started(num_links: int, resume: Optional[float]):
  56. start_ts = datetime.now()
  57. _LAST_RUN_STATS.archiving_start_ts = start_ts
  58. if resume:
  59. print('{green}[▶] [{}] Resuming archive updating for {} pages starting from {}...{reset}'.format(
  60. start_ts.strftime('%Y-%m-%d %H:%M:%S'),
  61. num_links,
  62. resume,
  63. **ANSI,
  64. ))
  65. else:
  66. print('{green}[▶] [{}] Updating content for {} pages in archive...{reset}'.format(
  67. start_ts.strftime('%Y-%m-%d %H:%M:%S'),
  68. num_links,
  69. **ANSI,
  70. ))
  71. def log_archiving_paused(num_links: int, idx: int, timestamp: str):
  72. end_ts = datetime.now()
  73. _LAST_RUN_STATS.archiving_end_ts = end_ts
  74. print()
  75. print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
  76. **ANSI,
  77. now=end_ts.strftime('%Y-%m-%d %H:%M:%S'),
  78. idx=idx+1,
  79. timestamp=timestamp,
  80. total=num_links,
  81. ))
  82. print(' To view your archive, open:')
  83. print(' {}/index.html'.format(OUTPUT_DIR))
  84. print(' Continue archiving where you left off by running:')
  85. print(' archivebox {}'.format(timestamp))
  86. def log_archiving_finished(num_links: int):
  87. end_ts = datetime.now()
  88. _LAST_RUN_STATS.archiving_end_ts = end_ts
  89. assert _LAST_RUN_STATS.archiving_start_ts is not None
  90. seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp()
  91. if seconds > 60:
  92. duration = '{0:.2f} min'.format(seconds / 60, 2)
  93. else:
  94. duration = '{0:.2f} sec'.format(seconds, 2)
  95. print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
  96. ANSI['green'],
  97. end_ts.strftime('%Y-%m-%d %H:%M:%S'),
  98. num_links,
  99. duration,
  100. ANSI['reset'],
  101. ))
  102. print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped))
  103. print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded))
  104. print(' - {} links had errors'.format(_LAST_RUN_STATS.failed))
  105. print(' To view your archive, open:')
  106. print(' {}/index.html'.format(OUTPUT_DIR))
  107. def log_link_archiving_started(link: Link, link_dir: str, is_new: bool):
  108. # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford"
  109. # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
  110. # > output/archive/1478739709
  111. print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format(
  112. symbol_color=ANSI['green' if is_new else 'black'],
  113. symbol='+' if is_new else '√',
  114. now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
  115. title=link.title or link.base_url,
  116. **ANSI,
  117. ))
  118. print(' {blue}{url}{reset}'.format(url=link.url, **ANSI))
  119. print(' {} {}'.format(
  120. '>' if is_new else '√',
  121. pretty_path(link_dir),
  122. ))
  123. def log_link_archiving_finished(link: Link, link_dir: str, is_new: bool, stats: dict):
  124. total = sum(stats.values())
  125. if stats['failed'] > 0 :
  126. _LAST_RUN_STATS.failed += 1
  127. elif stats['skipped'] == total:
  128. _LAST_RUN_STATS.skipped += 1
  129. else:
  130. _LAST_RUN_STATS.succeeded += 1
  131. def log_archive_method_started(method: str):
  132. print(' > {}'.format(method))
  133. def log_archive_method_finished(result: ArchiveResult):
  134. """quote the argument with whitespace in a command so the user can
  135. copy-paste the outputted string directly to run the cmd
  136. """
  137. # Prettify CMD string and make it safe to copy-paste by quoting arguments
  138. quoted_cmd = ' '.join(
  139. '"{}"'.format(arg) if ' ' in arg else arg
  140. for arg in result.cmd
  141. )
  142. if result.status == 'failed':
  143. # Prettify error output hints string and limit to five lines
  144. hints = getattr(result.output, 'hints', None) or ()
  145. if hints:
  146. hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n')
  147. hints = (
  148. ' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
  149. for line in hints[:5] if line.strip()
  150. )
  151. # Collect and prefix output lines with indentation
  152. output_lines = [
  153. '{lightred}Failed:{reset}'.format(**ANSI),
  154. ' {reset}{} {red}{}{reset}'.format(
  155. result.output.__class__.__name__.replace('ArchiveError', ''),
  156. result.output,
  157. **ANSI,
  158. ),
  159. *hints,
  160. '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
  161. *([' cd {};'.format(result.pwd)] if result.pwd else []),
  162. ' {}'.format(quoted_cmd),
  163. ]
  164. print('\n'.join(
  165. ' {}'.format(line)
  166. for line in output_lines
  167. if line
  168. ))
  169. print()