codeowners_hint.py 11 KB


  1. """
  2. Copyright (c) Contributors to the Open 3D Engine Project.
  3. For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. SPDX-License-Identifier: Apache-2.0 OR MIT
  5. Tools for inspecting GitHub CODEOWNERS files
  6. """
  7. import argparse
  8. import logging
  9. import os
  10. import pathlib
  11. import re
  12. logger = logging.getLogger(__name__)
  13. _DEFAULT_CODEOWNER_ALIAS = "https://www.o3de.org/community/"
  14. _GITHUB_CODEOWNERS_BYTE_LIMIT = 3 * 1024 * 1024 # 3MB
  15. def get_codeowners(target_path: pathlib.PurePath) -> (str|None, str|None, pathlib.PurePath|None):
  16. """
  17. Finds ownership information matching the target filesystem path from a CODEOWNERS file found in its GitHub repo
  18. :param target_path: path to match in a GitHub CODEOWNERS file, which will be discovered inside its repo
  19. :return: tuple of (matched_path_entry, matched_owner_aliases, found_codeowners_path) which are empty when missing
  20. """
  21. codeowners_path = find_github_codeowners(target_path)
  22. matched_path, owner_aliases = get_codeowners_from(target_path, codeowners_path)
  23. return matched_path, owner_aliases, codeowners_path
  24. def find_github_codeowners(target_path: pathlib.PurePath) -> pathlib.Path|None:
  25. """
  26. Finds the '.github/CODEOWNERS' file for the git repo containing target_path, scanning upward through the filesystem
  27. :param target_path: a path expected to exist in a GitHub repository containing a CODEOWNERS file
  28. :return: path to the CODEOWNERS file, or None if no file could be located
  29. """
  30. current_path = target_path
  31. for _ in range(1000):
  32. codeowners_path = os.path.join(current_path, ".github", "CODEOWNERS")
  33. if os.path.exists(codeowners_path):
  34. return pathlib.Path(codeowners_path)
  35. next_path = os.path.dirname(current_path)
  36. if next_path == current_path:
  37. break # reached filesystem root
  38. current_path = next_path
  39. logger.warning(f"No GitHub CODEOWNERS file found in a GitHub repo which contains {target_path}")
  40. return None
  41. def get_codeowners_from(target_path: pathlib.PurePath, codeowners_path: pathlib.PurePath) -> (str, str):
  42. """
  43. Fetch ownership information matching the target filesystem path from a CODEOWNERS file
  44. :param target_path: path to match in the GitHub CODEOWNERS file
  45. :param codeowners_path: path to CODEOWNERS file
  46. :return: tuple of (matched_path_entry, matched_owner_aliases), which will be empty when nothing was matched.
  47. The aliases will also be empty when a matched path is explicitly unowned.
  48. """
  49. if not os.path.isfile(codeowners_path):
  50. logger.warning(f"No GitHub CODEOWNERS file found at {codeowners_path}")
  51. return "", ""
  52. if os.path.getsize(codeowners_path) > _GITHUB_CODEOWNERS_BYTE_LIMIT:
  53. logger.warning(f"GitHub CODEOWNERS file found at {codeowners_path} exceeds the standard limit of "
  54. f"{_GITHUB_CODEOWNERS_BYTE_LIMIT} bytes")
  55. return "", ""
  56. # operate only on unix-style separators
  57. repo_root = pathlib.PurePosixPath(codeowners_path.parent.parent)
  58. unix_normalized_target = pathlib.PurePosixPath(target_path)
  59. if not unix_normalized_target.is_relative_to(repo_root):
  60. logger.warning(f"Path '{target_path}' is not inside the repo of GitHub CODEOWNERS file {codeowners_path}")
  61. return "", ""
  62. repo_relative_target = unix_normalized_target.relative_to(repo_root)
  63. repo_rooted_target = pathlib.PurePosixPath("/" + str(repo_relative_target)) # relative_to removes leading slash
  64. with open(codeowners_path) as codeowners_file:
  65. # GitHub syntax only applies the final matching rule ==> parse in reverse order and take first match
  66. for line in reversed(list(codeowners_file)):
  67. clean_line = line.strip()
  68. if clean_line and not clean_line.startswith('#'): # ignore blanks and full-line comments
  69. # entry format should be "owned/path/ @alias1 @alias2 [email protected] @aliasN..."
  70. split_entry = line.split(maxsplit=1)
  71. owned_path = split_entry[0]
  72. if _codeowners_path_matches(repo_rooted_target, owned_path):
  73. if len(split_entry) > 1:
  74. aliases = split_entry[1].split("#", maxsplit=1)[0].strip() # remove trailing comment
  75. else: # explicitly unowned entry with no comment
  76. aliases = ""
  77. return owned_path, aliases
  78. # else invalid entry syntax, ignore
  79. return "", "" # no match found
  80. def _codeowners_path_matches(target_path: pathlib.PurePosixPath, owned_path: str) -> bool:
  81. """
  82. :param target_path: PurePosixPath to match against, which starts from the root of the repo
  83. :param owned_path: path identifier found in a GitHub CODEOWNERS file (relative to root, may contain wildcards)
  84. :return: True when target_path is contained by the rules of owned_path
  85. """
  86. matched = False
  87. if '*' in owned_path or '?' in owned_path: # wildcards require glob matching
  88. if owned_path.startswith("*"): # special simple case for global wildcards
  89. matched = target_path.match(owned_path)
  90. elif owned_path.startswith("/"): # ownership of specific directory: glob A against B
  91. matched = target_path.match(owned_path[1:])
  92. else: # ownership of all relative directories: find non-wildcard portions of B in A, glob the remainders
  93. asterisk = owned_path.find("*")
  94. question = owned_path.find("?")
  95. if asterisk > -1 and question > -1:
  96. first_wildcard_index = min(asterisk, question)
  97. else: # avoid not-found index
  98. first_wildcard_index = max(asterisk, question)
  99. separator_indices = [index.start() for index in re.finditer(pattern="/", string=owned_path)]
  100. pre_wildcard_separator_index = 0
  101. for s_index in separator_indices:
  102. if s_index < first_wildcard_index:
  103. pre_wildcard_separator_index = s_index
  104. else: # remainder are all greater
  105. break
  106. # separate non-wildcard-containing path from remainder
  107. pre_wildcard_owned = owned_path[:pre_wildcard_separator_index]
  108. wildcard_with_remainder_owned = owned_path[pre_wildcard_separator_index+1:]
  109. # find substrings of initial portion of B within A
  110. target_str = str(target_path)
  111. pre_wildcard_target_end_indices = [index.end() for index in
  112. re.finditer(pattern=pre_wildcard_owned, string=target_str)]
  113. # glob remainders of A against remainder of B
  114. for target_index in pre_wildcard_target_end_indices: # may be multiple substring matches within target
  115. target_remainder = target_str[target_index:]
  116. if pathlib.PurePosixPath(target_remainder).match(wildcard_with_remainder_owned):
  117. matched = True
  118. break # exit early on success
  119. else: # simple path matching
  120. if owned_path.startswith("/"): # ownership of specific directory: verify if A exists inside B
  121. matched = target_path.is_relative_to(owned_path)
  122. else: # ownership of all relative directories: verify if B is a substring of A
  123. matched = owned_path in str(target_path)
  124. return matched
  125. def _pretty_print_success(print_fn, found_codeowners_path, matched_path, owner_aliases) -> None:
  126. """
  127. Prints a friendly message, instead of the default terse output of owner alias(es)
  128. :param print_fn: function to call when logging strings
  129. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file
  130. :param matched_path: first part of an entry matched in the CODEOWNERS file
  131. :param owner_aliases: second part of an entry in a CODEOWNERS file
  132. """
  133. print_fn(f"Matched '{matched_path}' in file {found_codeowners_path}")
  134. print_fn(f"For additional support please reach out to: {owner_aliases}")
  135. def _pretty_print_failure(print_fn, found_codeowners_path, matched_path, original_target,
  136. default_alias=_DEFAULT_CODEOWNER_ALIAS) -> None:
  137. """
  138. Prints a friendly message about failure to find an owner
  139. :param print_fn: function to call when logging strings
  140. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file which, empty when missing
  141. :param matched_path: entry matched in the CODEOWNERS file, empty when not matched
  142. :param original_target: the path which matching was attempted on
  143. :param default_alias: who to contact as no owner was found
  144. """
  145. if not found_codeowners_path:
  146. print_fn(f"No GitHub CODEOWNERS file was found for '{original_target}'")
  147. else:
  148. if not matched_path:
  149. print_fn(f"No ownership information for '{original_target}' found in file {found_codeowners_path}")
  150. else:
  151. print_fn(f"Ownership for '{matched_path}' is explicitly empty in file {found_codeowners_path}")
  152. print_fn(f"For additional support please reach out to: {default_alias}")
  153. def _main() -> int:
  154. parser = argparse.ArgumentParser(description="Display GitHub CODEOWNERS information to stdout for a target path")
  155. parser.add_argument('target', metavar='T', type=pathlib.Path,
  156. help="file path to find an owner for")
  157. parser.add_argument('-c', '--codeowners', type=pathlib.Path,
  158. help="path to a GitHub CODEOWNERS file, when not set this will scan upward to find the repo "
  159. "containing the target")
  160. parser.add_argument('-d', '--default_alias', default=_DEFAULT_CODEOWNER_ALIAS,
  161. help="a default location to reach out for support, for when ownership cannot be determined")
  162. parser.add_argument('-p', '--pretty_print', action='store_true',
  163. help="output ownership info as a friendly message instead of only alias(es)")
  164. parser.add_argument('-s', '--silent', action='store_true',
  165. help="Suppress any warning messages and only print ownership information")
  166. args = parser.parse_args()
  167. if args.silent:
  168. logging.disable()
  169. else:
  170. logging.basicConfig()
  171. if args.codeowners:
  172. matched_path, owner_aliases = get_codeowners_from(args.target, args.codeowners)
  173. found_codeowners = os.path.isfile(args.codeowners)
  174. else:
  175. matched_path, owner_aliases, found_codeowners = get_codeowners(args.target)
  176. if owner_aliases and matched_path and found_codeowners:
  177. if args.pretty_print:
  178. _pretty_print_success(print, found_codeowners, matched_path, owner_aliases)
  179. else:
  180. print(owner_aliases)
  181. return 0
  182. else:
  183. if args.pretty_print:
  184. _pretty_print_failure(print, found_codeowners, matched_path, args.target, args.default_alias)
  185. else:
  186. print(args.default_alias)
  187. return 1
  188. logger.error("Unexpected abnormal exit")
  189. return -1