check_copyright.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/bin/env python
  2. # coding=utf-8
  3. # Copyright (c) 2016 Google Inc.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """Checks for copyright notices in all the files that need them under the
  17. current directory. Optionally insert them. When inserting, replaces
  18. an MIT or Khronos free use license with Apache 2.
  19. """
  20. import argparse
  21. import fileinput
  22. import fnmatch
  23. import inspect
  24. import os
  25. import re
  26. import sys
  27. # List of designated copyright owners.
  28. AUTHORS = ['The Khronos Group Inc.',
  29. 'LunarG Inc.',
  30. 'Google Inc.',
  31. 'Google LLC',
  32. 'Pierre Moreau',
  33. 'Samsung Inc',
  34. 'André Perez Maselco',
  35. 'Vasyl Teliman',
  36. 'Advanced Micro Devices, Inc.',
  37. 'Stefano Milizia']
  38. CURRENT_YEAR='2020'
  39. YEARS = '(2014-2016|2015-2016|2015-2020|2016|2016-2017|2017|2017-2019|2018|2019|2020|2021)'
  40. COPYRIGHT_RE = re.compile(
  41. 'Copyright \(c\) {} ({})'.format(YEARS, '|'.join(AUTHORS)))
  42. MIT_BEGIN_RE = re.compile('Permission is hereby granted, '
  43. 'free of charge, to any person obtaining a')
  44. MIT_END_RE = re.compile('MATERIALS OR THE USE OR OTHER DEALINGS IN '
  45. 'THE MATERIALS.')
  46. APACHE2_BEGIN_RE = re.compile('Licensed under the Apache License, '
  47. 'Version 2.0 \(the "License"\);')
  48. APACHE2_END_RE = re.compile('limitations under the License.')
  49. LICENSED = """Licensed under the Apache License, Version 2.0 (the "License");
  50. you may not use this file except in compliance with the License.
  51. You may obtain a copy of the License at
  52. http://www.apache.org/licenses/LICENSE-2.0
  53. Unless required by applicable law or agreed to in writing, software
  54. distributed under the License is distributed on an "AS IS" BASIS,
  55. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  56. See the License for the specific language governing permissions and
  57. limitations under the License."""
  58. LICENSED_LEN = 10 # Number of lines in LICENSED
  59. def find(top, filename_glob, skip_glob_dir_list, skip_glob_files_list):
  60. """Returns files in the tree rooted at top matching filename_glob but not
  61. in directories matching skip_glob_dir_list nor files matching
  62. skip_glob_dir_list."""
  63. file_list = []
  64. for path, dirs, files in os.walk(top):
  65. for glob in skip_glob_dir_list:
  66. for match in fnmatch.filter(dirs, glob):
  67. dirs.remove(match)
  68. for filename in fnmatch.filter(files, filename_glob):
  69. full_file = os.path.join(path, filename)
  70. if full_file not in skip_glob_files_list:
  71. file_list.append(full_file)
  72. return file_list
  73. def filtered_descendants(glob):
  74. """Returns glob-matching filenames under the current directory, but skips
  75. some irrelevant paths."""
  76. return find('.', glob, ['third_party', 'external', 'CompilerIdCXX',
  77. 'build*', 'out*'], ['./utils/clang-format-diff.py'])
  78. def skip(line):
  79. """Returns true if line is all whitespace or shebang."""
  80. stripped = line.lstrip()
  81. return stripped == '' or stripped.startswith('#!')
  82. def comment(text, prefix):
  83. """Returns commented-out text.
  84. Each line of text will be prefixed by prefix and a space character. Any
  85. trailing whitespace will be trimmed.
  86. """
  87. accum = ['{} {}'.format(prefix, line).rstrip() for line in text.split('\n')]
  88. return '\n'.join(accum)
  89. def insert_copyright(author, glob, comment_prefix):
  90. """Finds all glob-matching files under the current directory and inserts the
  91. copyright message, and license notice. An MIT license or Khronos free
  92. use license (modified MIT) is replaced with an Apache 2 license.
  93. The copyright message goes into the first non-whitespace, non-shebang line
  94. in a file. The license notice follows it. Both are prefixed on each line
  95. by comment_prefix and a space.
  96. """
  97. copyright = comment('Copyright (c) {} {}'.format(CURRENT_YEAR, author),
  98. comment_prefix) + '\n\n'
  99. licensed = comment(LICENSED, comment_prefix) + '\n\n'
  100. for file in filtered_descendants(glob):
  101. # Parsing states are:
  102. # 0 Initial: Have not seen a copyright declaration.
  103. # 1 Seen a copyright line and no other interesting lines
  104. # 2 In the middle of an MIT or Khronos free use license
  105. # 9 Exited any of the above
  106. state = 0
  107. update_file = False
  108. for line in fileinput.input(file, inplace=1):
  109. emit = True
  110. if state is 0:
  111. if COPYRIGHT_RE.search(line):
  112. state = 1
  113. elif skip(line):
  114. pass
  115. else:
  116. # Didn't see a copyright. Inject copyright and license.
  117. sys.stdout.write(copyright)
  118. sys.stdout.write(licensed)
  119. # Assume there isn't a previous license notice.
  120. state = 1
  121. elif state is 1:
  122. if MIT_BEGIN_RE.search(line):
  123. state = 2
  124. emit = False
  125. elif APACHE2_BEGIN_RE.search(line):
  126. # Assume an Apache license is preceded by a copyright
  127. # notice. So just emit it like the rest of the file.
  128. state = 9
  129. elif state is 2:
  130. # Replace the MIT license with Apache 2
  131. emit = False
  132. if MIT_END_RE.search(line):
  133. state = 9
  134. sys.stdout.write(licensed)
  135. if emit:
  136. sys.stdout.write(line)
  137. def alert_if_no_copyright(glob, comment_prefix):
  138. """Prints names of all files missing either a copyright or Apache 2 license.
  139. Finds all glob-matching files under the current directory and checks if they
  140. contain the copyright message and license notice. Prints the names of all the
  141. files that don't meet both criteria.
  142. Returns the total number of file names printed.
  143. """
  144. printed_count = 0
  145. for file in filtered_descendants(glob):
  146. has_copyright = False
  147. has_apache2 = False
  148. line_num = 0
  149. apache_expected_end = 0
  150. with open(file, encoding='utf-8') as contents:
  151. for line in contents:
  152. line_num += 1
  153. if COPYRIGHT_RE.search(line):
  154. has_copyright = True
  155. if APACHE2_BEGIN_RE.search(line):
  156. apache_expected_end = line_num + LICENSED_LEN
  157. if (line_num is apache_expected_end) and APACHE2_END_RE.search(line):
  158. has_apache2 = True
  159. if not (has_copyright and has_apache2):
  160. message = file
  161. if not has_copyright:
  162. message += ' has no copyright'
  163. if not has_apache2:
  164. message += ' has no Apache 2 license notice'
  165. print(message)
  166. printed_count += 1
  167. return printed_count
  168. class ArgParser(argparse.ArgumentParser):
  169. def __init__(self):
  170. super(ArgParser, self).__init__(
  171. description=inspect.getdoc(sys.modules[__name__]))
  172. self.add_argument('--update', dest='author', action='store',
  173. help='For files missing a copyright notice, insert '
  174. 'one for the given author, and add a license '
  175. 'notice. The author must be in the AUTHORS '
  176. 'list in the script.')
  177. def main():
  178. glob_comment_pairs = [('*.h', '//'), ('*.hpp', '//'), ('*.sh', '#'),
  179. ('*.py', '#'), ('*.cpp', '//'),
  180. ('CMakeLists.txt', '#')]
  181. argparser = ArgParser()
  182. args = argparser.parse_args()
  183. if args.author:
  184. if args.author not in AUTHORS:
  185. print('error: --update argument must be in the AUTHORS list in '
  186. 'check_copyright.py: {}'.format(AUTHORS))
  187. sys.exit(1)
  188. for pair in glob_comment_pairs:
  189. insert_copyright(args.author, *pair)
  190. sys.exit(0)
  191. else:
  192. count = sum([alert_if_no_copyright(*p) for p in glob_comment_pairs])
  193. sys.exit(count > 0)
  194. if __name__ == '__main__':
  195. main()