archivebox_list.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox list'
  4. import sys
  5. import argparse
  6. from pathlib import Path
  7. from typing import Optional, List, IO
  8. from archivebox.misc.util import docstring
  9. from archivebox.config import DATA_DIR
  10. from ..main import list_all
  11. from ..index import (
  12. LINK_FILTERS,
  13. get_indexed_folders,
  14. get_archived_folders,
  15. get_unarchived_folders,
  16. get_present_folders,
  17. get_valid_folders,
  18. get_invalid_folders,
  19. get_duplicate_folders,
  20. get_orphaned_folders,
  21. get_corrupted_folders,
  22. get_unrecognized_folders,
  23. )
  24. from ..logging_util import SmartFormatter, reject_stdin, stderr
  25. @docstring(list_all.__doc__)
  26. def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  27. parser = argparse.ArgumentParser(
  28. prog=__command__,
  29. description=list_all.__doc__,
  30. add_help=True,
  31. formatter_class=SmartFormatter,
  32. )
  33. group = parser.add_mutually_exclusive_group()
  34. group.add_argument(
  35. '--csv', #'-c',
  36. type=str,
  37. help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",
  38. default=None,
  39. )
  40. group.add_argument(
  41. '--json', #'-j',
  42. action='store_true',
  43. help="Print the output in JSON format with all columns included",
  44. )
  45. group.add_argument(
  46. '--html',
  47. action='store_true',
  48. help="Print the output in HTML format"
  49. )
  50. parser.add_argument(
  51. '--with-headers',
  52. action='store_true',
  53. help='Include the headers in the output document'
  54. )
  55. parser.add_argument(
  56. '--sort', #'-s',
  57. type=str,
  58. help="List the links sorted using the given key, e.g. timestamp or updated",
  59. default=None,
  60. )
  61. parser.add_argument(
  62. '--before', #'-b',
  63. type=float,
  64. help="List only links bookmarked before (less than) the given timestamp",
  65. default=None,
  66. )
  67. parser.add_argument(
  68. '--after', #'-a',
  69. type=float,
  70. help="List only links bookmarked after (greater than or equal to) the given timestamp",
  71. default=None,
  72. )
  73. parser.add_argument(
  74. '--status',
  75. type=str,
  76. choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
  77. default='indexed',
  78. help=(
  79. 'List only links or data directories that have the given status\n'
  80. f' indexed {get_indexed_folders.__doc__} (the default)\n'
  81. f' archived {get_archived_folders.__doc__}\n'
  82. f' unarchived {get_unarchived_folders.__doc__}\n'
  83. '\n'
  84. f' present {get_present_folders.__doc__}\n'
  85. f' valid {get_valid_folders.__doc__}\n'
  86. f' invalid {get_invalid_folders.__doc__}\n'
  87. '\n'
  88. f' duplicate {get_duplicate_folders.__doc__}\n'
  89. f' orphaned {get_orphaned_folders.__doc__}\n'
  90. f' corrupted {get_corrupted_folders.__doc__}\n'
  91. f' unrecognized {get_unrecognized_folders.__doc__}\n'
  92. )
  93. )
  94. parser.add_argument(
  95. '--filter-type', '-t',
  96. type=str,
  97. choices=(*LINK_FILTERS.keys(), 'search'),
  98. default='exact',
  99. help='Type of pattern matching to use when filtering URLs',
  100. )
  101. parser.add_argument(
  102. 'filter_patterns',
  103. nargs='*',
  104. type=str,
  105. default=None,
  106. help='List only URLs matching these filter patterns'
  107. )
  108. command = parser.parse_args(args or ())
  109. reject_stdin(stdin)
  110. if command.with_headers and not (command.json or command.html or command.csv):
  111. stderr(
  112. '[X] --with-headers can only be used with --json, --html or --csv options\n',
  113. color='red',
  114. )
  115. raise SystemExit(2)
  116. matching_folders = list_all(
  117. filter_patterns=command.filter_patterns,
  118. filter_type=command.filter_type,
  119. status=command.status,
  120. after=command.after,
  121. before=command.before,
  122. sort=command.sort,
  123. csv=command.csv,
  124. json=command.json,
  125. html=command.html,
  126. with_headers=command.with_headers,
  127. out_dir=Path(pwd) if pwd else DATA_DIR,
  128. )
  129. raise SystemExit(not matching_folders)
  130. if __name__ == '__main__':
  131. main(args=sys.argv[1:], stdin=sys.stdin)