convert_git_renames_to_csv.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """Uses git to list files that were renamed between two revisions and converts
  2. that to a CSV table.
  3. Use it to prepare and double-check data for create_redirects.py.
  4. """
  5. import subprocess
  6. import argparse
  7. import csv
  8. import sys
  9. try:
  10. subprocess.check_output(["git", "--version"])
  11. except subprocess.CalledProcessError:
  12. print("Git not found. It's required to run this program.")
  13. def parse_command_line_args():
  14. parser = argparse.ArgumentParser(
  15. description="Uses git to list files that were renamed between two revisions and "
  16. "converts that to a CSV table. Use it to prepare and double-check data for create_redirects.py."
  17. )
  18. parser.add_argument(
  19. "revision1",
  20. type=str,
  21. help="Start revision to get renamed files from.",
  22. )
  23. parser.add_argument(
  24. "revision2",
  25. type=str,
  26. help="End revision to get renamed files from.",
  27. )
  28. parser.add_argument("-f", "--output-file", type=str, help="Path to the output file")
  29. return parser.parse_args()
  30. def main():
  31. args = parse_command_line_args()
  32. assert args.revision1 != args.revision2, "Revisions must be different."
  33. for revision in [args.revision1, args.revision2]:
  34. assert not "/" in revision, "Revisions must be local branches only."
  35. # Ensure that both revisions are present in the local repository.
  36. for revision in [args.revision1, args.revision2]:
  37. try:
  38. subprocess.check_output(
  39. ["git", "rev-list", f"HEAD..{revision}"], stderr=subprocess.STDOUT
  40. )
  41. except subprocess.CalledProcessError:
  42. print(
  43. f"Revision {revision} not found in this repository. "
  44. "Please make sure that both revisions exist locally in your git repository."
  45. )
  46. exit(1)
  47. # Get the list of renamed files between the two revisions.
  48. renamed_files = (
  49. subprocess.check_output(
  50. [
  51. "git",
  52. "diff",
  53. "--name-status",
  54. "--diff-filter=R",
  55. args.revision1,
  56. args.revision2,
  57. ]
  58. )
  59. .decode("utf-8")
  60. .split("\n")
  61. )
  62. renamed_documents = [f for f in renamed_files if f.endswith(".rst")]
  63. csv_data: list[dict] = []
  64. branch = args.revision2
  65. for document in renamed_documents:
  66. _, source, destination = document.split("\t")
  67. csv_data.append(
  68. {"source": source, "destination": destination, "branch": branch}
  69. )
  70. if args.output_file:
  71. with open(args.output_file, "w") as f:
  72. writer = csv.DictWriter(f, fieldnames=csv_data[0].keys()).writerows(
  73. csv_data
  74. )
  75. writer.writeheader()
  76. writer.writerows(csv_data)
  77. else:
  78. writer = csv.DictWriter(sys.stdout, fieldnames=csv_data[0].keys())
  79. writer.writeheader()
  80. writer.writerows(csv_data)
  81. if __name__ == "__main__":
  82. main()