codeowners_hint.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """
  2. Copyright (c) Contributors to the Open 3D Engine Project.
  3. For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. SPDX-License-Identifier: Apache-2.0 OR MIT
  5. Tools for inspecting GitHub CODEOWNERS files
  6. """
  7. import argparse
  8. import logging
  9. import os
  10. import pathlib
  11. import re
  12. logger = logging.getLogger(__name__)
  13. _DEFAULT_CODEOWNER_ALIAS = "https://www.o3de.org/community/"
  14. _GITHUB_CODEOWNERS_BYTE_LIMIT = 3 * 1024 * 1024 # 3MB
  15. def get_codeowners(target_path: pathlib.PurePath) -> (str|None, str|None, pathlib.PurePath|None):
  16. """
  17. Finds ownership information matching the target filesystem path from a CODEOWNERS file found in its GitHub repo
  18. :param target_path: path to match in a GitHub CODEOWNERS file, which will be discovered inside its repo
  19. :return: tuple of (matched_path_entry, matched_owner_aliases, found_codeowners_path) which are empty when missing
  20. """
  21. codeowners_path = find_github_codeowners(target_path)
  22. matched_path, owner_aliases = get_codeowners_from(target_path, codeowners_path)
  23. return matched_path, owner_aliases, codeowners_path
  24. def find_github_codeowners(target_path: pathlib.PurePath) -> pathlib.Path|None:
  25. """
  26. Finds the '.github/CODEOWNERS' file for the git repo containing target_path, scanning upward through the filesystem
  27. :param target_path: a path expected to exist in a GitHub repository containing a CODEOWNERS file
  28. :return: path to the CODEOWNERS file, or None if no file could be located
  29. """
  30. current_path = target_path
  31. for _ in range(1000):
  32. codeowners_path = os.path.join(current_path, ".github", "CODEOWNERS")
  33. if os.path.exists(codeowners_path):
  34. return pathlib.Path(codeowners_path)
  35. next_path = os.path.dirname(current_path)
  36. if next_path == current_path:
  37. break # reached filesystem root
  38. current_path = next_path
  39. logger.warning(f"No GitHub CODEOWNERS file found in a GitHub repo which contains {target_path}")
  40. return None
  41. def get_codeowners_from(target_path: pathlib.PurePath, codeowners_path: pathlib.PurePath) -> (str, str):
  42. """
  43. Fetch ownership information matching the target filesystem path from a CODEOWNERS file
  44. :param target_path: path to match in the GitHub CODEOWNERS file
  45. :param codeowners_path: path to CODEOWNERS file
  46. :return: tuple of (matched_path_entry, matched_owner_aliases), which will be empty when nothing was matched.
  47. The aliases will also be empty when a matched path is explicitly unowned.
  48. """
  49. if not os.path.isfile(codeowners_path):
  50. logger.warning(f"No GitHub CODEOWNERS file found at {codeowners_path}")
  51. return "", ""
  52. if os.path.getsize(codeowners_path) > _GITHUB_CODEOWNERS_BYTE_LIMIT:
  53. logger.warning(f"GitHub CODEOWNERS file found at {codeowners_path} exceeds the standard limit of "
  54. f"{_GITHUB_CODEOWNERS_BYTE_LIMIT} bytes")
  55. return "", ""
  56. # operate only on unix-style separators
  57. repo_root = pathlib.PurePosixPath(codeowners_path.parent.parent)
  58. unix_normalized_target = pathlib.PurePosixPath(target_path)
  59. if not unix_normalized_target.is_relative_to(repo_root):
  60. logger.warning(f"Path '{target_path}' is not inside the repo of GitHub CODEOWNERS file {codeowners_path}")
  61. return "", ""
  62. repo_relative_target = unix_normalized_target.relative_to(repo_root)
  63. repo_rooted_target = pathlib.PurePosixPath("/" + str(repo_relative_target)) # relative_to removes leading slash
  64. with open(codeowners_path) as codeowners_file:
  65. # GitHub syntax only applies the final matching rule ==> parse in reverse order and take first match
  66. for line in reversed(list(codeowners_file)):
  67. clean_line = line.strip()
  68. if clean_line and not clean_line.startswith('#'): # ignore blanks and full-line comments
  69. # entry format should be "owned/path/ @alias1 @alias2 [email protected] @aliasN..."
  70. split_entry = line.split(maxsplit=1)
  71. owned_path = split_entry[0]
  72. if _codeowners_path_matches(repo_rooted_target, owned_path):
  73. if len(split_entry) > 1:
  74. aliases = split_entry[1].split("#", maxsplit=1)[0].strip() # remove trailing comment
  75. else: # explicitly unowned entry with no comment
  76. aliases = ""
  77. return owned_path, aliases
  78. # else invalid entry syntax, ignore
  79. return "", "" # no match found
  80. def _codeowners_path_matches(target_path: pathlib.PurePosixPath, owned_path: str) -> bool:
  81. """
  82. :param target_path: PurePosixPath to match against, which starts from the root of the repo
  83. :param owned_path: path identifier found in a GitHub CODEOWNERS file (relative to root, may contain wildcards)
  84. :return: True when target_path is contained by the rules of owned_path
  85. """
  86. matched = False
  87. if '*' in owned_path or '?' in owned_path: # wildcards require glob matching
  88. if owned_path.startswith("*"): # special simple case for global wildcards
  89. matched = target_path.match(owned_path)
  90. elif owned_path.startswith("/"): # ownership of specific directory: glob A against B
  91. matched = target_path.match(owned_path[1:])
  92. else: # ownership of all relative directories: find non-wildcard portions of B in A, glob the remainders
  93. asterisk = owned_path.find("*")
  94. question = owned_path.find("?")
  95. if asterisk > -1 and question > -1:
  96. first_wildcard_index = min(asterisk, question)
  97. else: # avoid not-found index
  98. first_wildcard_index = max(asterisk, question)
  99. separator_indices = [index.start() for index in re.finditer(pattern="/", string=owned_path)]
  100. pre_wildcard_separator_index = 0
  101. for s_index in separator_indices:
  102. if s_index < first_wildcard_index:
  103. pre_wildcard_separator_index = s_index
  104. else: # remainder are all greater
  105. break
  106. # separate non-wildcard-containing path from remainder
  107. pre_wildcard_owned = owned_path[:pre_wildcard_separator_index]
  108. wildcard_with_remainder_owned = owned_path[pre_wildcard_separator_index+1:]
  109. # find substrings of initial portion of B within A
  110. target_str = str(target_path)
  111. pre_wildcard_target_end_indices = [index.end() for index in
  112. re.finditer(pattern=pre_wildcard_owned, string=target_str)]
  113. # glob remainders of A against remainder of B
  114. for target_index in pre_wildcard_target_end_indices: # may be multiple substring matches within target
  115. target_remainder = target_str[target_index:]
  116. if pathlib.PurePosixPath(target_remainder).match(wildcard_with_remainder_owned):
  117. matched = True
  118. break # exit early on success
  119. else: # simple path matching
  120. if owned_path.startswith("/"): # ownership of specific directory: verify if A exists inside B
  121. matched = target_path.is_relative_to(owned_path)
  122. else: # ownership of all relative directories: verify if B is a substring of A
  123. matched = owned_path in str(target_path)
  124. return matched
  125. def _pretty_print_success(print_fn, found_codeowners_path, matched_path, owner_aliases) -> None:
  126. """
  127. Prints a friendly message, instead of the default terse output of owner alias(es)
  128. :param print_fn: function to call when logging strings
  129. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file
  130. :param matched_path: first part of an entry matched in the CODEOWNERS file
  131. :param owner_aliases: second part of an entry in a CODEOWNERS file
  132. """
  133. print_fn(f"Matched '{matched_path}' in file {found_codeowners_path}")
  134. print_fn(f"For additional support please reach out to: {owner_aliases}")
  135. def _pretty_print_failure(print_fn, found_codeowners_path, matched_path, original_target,
  136. default_alias=_DEFAULT_CODEOWNER_ALIAS) -> None:
  137. """
  138. Prints a friendly message about failure to find an owner
  139. :param print_fn: function to call when logging strings
  140. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file which, empty when missing
  141. :param matched_path: entry matched in the CODEOWNERS file, empty when not matched
  142. :param original_target: the path which matching was attempted on
  143. :param default_alias: who to contact as no owner was found
  144. """
  145. if not found_codeowners_path:
  146. print_fn(f"No GitHub CODEOWNERS file was found for '{original_target}'")
  147. else:
  148. if not matched_path:
  149. print_fn(f"No ownership information for '{original_target}' found in file {found_codeowners_path}")
  150. else:
  151. print_fn(f"Ownership for '{matched_path}' is explicitly empty in file {found_codeowners_path}")
  152. print_fn(f"For additional support please reach out to: {default_alias}")
  153. def _main() -> int:
  154. parser = argparse.ArgumentParser(description="Display GitHub CODEOWNERS information to stdout for a target path")
  155. parser.add_argument('target', metavar='T', type=pathlib.Path,
  156. help="file path to find an owner for")
  157. parser.add_argument('-c', '--codeowners', type=pathlib.Path,
  158. help="path to a GitHub CODEOWNERS file, when not set this will scan upward to find the repo "
  159. "containing the target")
  160. parser.add_argument('-d', '--default_alias', default=_DEFAULT_CODEOWNER_ALIAS,
  161. help="a default location to reach out for support, for when ownership cannot be determined")
  162. parser.add_argument('-p', '--pretty_print', action='store_true',
  163. help="output ownership info as a friendly message instead of only alias(es)")
  164. parser.add_argument('-s', '--silent', action='store_true',
  165. help="Suppress any warning messages and only print ownership information")
  166. args = parser.parse_args()
  167. if args.silent:
  168. logging.disable()
  169. else:
  170. logging.basicConfig()
  171. if args.codeowners:
  172. matched_path, owner_aliases = get_codeowners_from(args.target, args.codeowners)
  173. found_codeowners = os.path.isfile(args.codeowners)
  174. else:
  175. matched_path, owner_aliases, found_codeowners = get_codeowners(args.target)
  176. if owner_aliases and matched_path and found_codeowners:
  177. if args.pretty_print:
  178. _pretty_print_success(print, found_codeowners, matched_path, owner_aliases)
  179. else:
  180. print(owner_aliases)
  181. return 0
  182. else:
  183. if args.pretty_print:
  184. _pretty_print_failure(print, found_codeowners, matched_path, args.target, args.default_alias)
  185. else:
  186. print(args.default_alias)
  187. return 1
  188. logger.error("Unexpected abnormal exit")
  189. return -1