create_redirects.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. #!/usr/bin/env python3
  2. """Manages page redirects for the Godot documentation on ReadTheDocs. (https://docs.godotengine.org)
  3. Note that RTD redirects only apply in case of 404 errors, and to all branches and languages:
  4. https://docs.readthedocs.io/en/stable/user-defined-redirects.html.
  5. If this ever changes, we need to rework how we manage these (likely adding per-branch logic).
  6. How to use:
  7. - Install requirements: pip3 install -r requirements.txt
  8. - Store your API token in RTD_API_TOKEN environment variable or
  9. a .env file (the latter requires the package dotenv)
  10. - Generate new redirects from two git revisions using convert_git_renames_to_csv.py
  11. - Run this script
  12. Example:
  13. python convert_git_renames_to_csv.py stable 3.4 >> redirects.csv
  14. python create_redirects.py
  15. This would add all files that were renamed in 3.4 from stable to redirects.csv,
  16. and then create the redirects on RTD accordingly.
  17. Care is taken to not add redirects that already exist on RTD.
  18. """
  19. import argparse
  20. import csv
  21. import os
  22. import time
  23. import requests
  24. from requests.models import default_hooks
  25. from requests.adapters import HTTPAdapter
  26. from requests.packages.urllib3.util.retry import Retry
  27. RTD_AUTH_TOKEN = ""
  28. REQUEST_HEADERS = ""
  29. REDIRECT_URL = "https://readthedocs.org/api/v3/projects/godot/redirects/"
  30. USER_AGENT = "Godot RTD Redirects on Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
  31. DEFAULT_PAGINATED_SIZE = 1024
  32. API_SLEEP_TIME = 0.2 # Seconds.
  33. REDIRECT_SUFFIXES = [".html", "/"]
  34. TIMEOUT_SECONDS = 5
  35. HTTP = None
  36. def parse_command_line_args():
  37. parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  38. parser.add_argument(
  39. "-f",
  40. "--file",
  41. metavar="file",
  42. default="redirects.csv",
  43. type=str,
  44. help="Path to a CSV file used to keep a list of redirects, containing two columns: source and destination.",
  45. )
  46. parser.add_argument(
  47. "--delete",
  48. action="store_true",
  49. help="Deletes all currently setup 'page' and 'exact' redirects on ReadTheDocs.",
  50. )
  51. parser.add_argument(
  52. "--dry-run",
  53. action="store_true",
  54. help="Safe mode: Run the program and output information without any calls to the ReadTheDocs API.",
  55. )
  56. parser.add_argument(
  57. "--dump",
  58. action="store_true",
  59. help="Only dumps or deletes (if --delete) existing RTD redirects, skips submission.",
  60. )
  61. parser.add_argument(
  62. "-v",
  63. "--verbose",
  64. action="store_true",
  65. help="Enables verbose output.",
  66. )
  67. return parser.parse_args()
  68. def make_redirect(source, destination, args, retry=0):
  69. json_data = {"from_url": source, "to_url": destination, "type": "page"}
  70. headers = REQUEST_HEADERS
  71. if args.verbose:
  72. print("POST " + REDIRECT_URL, headers, json_data)
  73. if args.dry_run:
  74. print(f"Created redirect {source} -> {destination} (DRY RUN)")
  75. return
  76. response = HTTP.post(
  77. REDIRECT_URL,
  78. json=json_data,
  79. headers=headers,
  80. timeout=TIMEOUT_SECONDS
  81. )
  82. if response.status_code == 201:
  83. print(f"Created redirect {source} -> {destination}")
  84. elif response.status_code == 429 and retry<5:
  85. retry += 1
  86. time.sleep(retry*retry)
  87. make_redirect(source, destination, args, retry)
  88. return
  89. else:
  90. print(
  91. f"Failed to create redirect {source} -> {destination}. "
  92. f"Status code: {response.status_code}"
  93. )
  94. exit(1)
  95. def sleep():
  96. time.sleep(API_SLEEP_TIME)
  97. def id(from_url, to_url):
  98. return from_url + " -> " + to_url
  99. def get_paginated(url, parameters={"limit": DEFAULT_PAGINATED_SIZE}):
  100. entries = []
  101. count = -1
  102. while True:
  103. data = HTTP.get(
  104. url,
  105. headers=REQUEST_HEADERS,
  106. params=parameters,
  107. timeout=TIMEOUT_SECONDS
  108. )
  109. if data.status_code != 200:
  110. if data.status_code == 401:
  111. print("Access denied, check RTD API key in RTD_AUTH_TOKEN!")
  112. print("Error accessing RTD API: " + url + ": " + str(data.status_code))
  113. exit(1)
  114. else:
  115. json = data.json()
  116. if json["count"] and count < 0:
  117. count = json["count"]
  118. entries.extend(json["results"])
  119. next = json["next"]
  120. if next and len(next) > 0 and next != url:
  121. url = next
  122. sleep()
  123. continue
  124. if count > 0 and len(entries) != count:
  125. print(
  126. "Mismatch getting paginated content from " + url + ": " +
  127. "expected " + str(count) + " items, got " + str(len(entries)))
  128. exit(1)
  129. return entries
  130. def delete_redirect(id):
  131. url = REDIRECT_URL + str(id)
  132. data = HTTP.delete(url, headers=REQUEST_HEADERS, timeout=TIMEOUT_SECONDS)
  133. if data.status_code != 204:
  134. print("Error deleting redirect with ID", id, "- code:", data.status_code)
  135. exit(1)
  136. else:
  137. print("Deleted redirect", id, "on RTD.")
  138. def get_existing_redirects(delete=False):
  139. redirs = get_paginated(REDIRECT_URL)
  140. existing = []
  141. for redir in redirs:
  142. if redir["type"] != "page":
  143. print(
  144. "Ignoring redirect (only type 'page' is handled): #" +
  145. str(redir["pk"]) + " " + id(redir["from_url"], redir["to_url"]) +
  146. " on ReadTheDocs is '" + redir["type"] + "'. "
  147. )
  148. continue
  149. if delete:
  150. delete_redirect(redir["pk"])
  151. sleep()
  152. else:
  153. existing.append([redir["from_url"], redir["to_url"]])
  154. return existing
  155. def set_auth(token):
  156. global RTD_AUTH_TOKEN
  157. RTD_AUTH_TOKEN = token
  158. global REQUEST_HEADERS
  159. REQUEST_HEADERS = {"Authorization": f"token {RTD_AUTH_TOKEN}", "User-Agent": USER_AGENT}
  160. def load_auth():
  161. try:
  162. import dotenv
  163. dotenv.load_dotenv()
  164. except:
  165. print("Failed to load dotenv. If you want to use .env files, install the dotenv.")
  166. token = os.environ.get("RTD_AUTH_TOKEN", "")
  167. if len(token) < 1:
  168. print("Missing auth token in RTD_AUTH_TOKEN env var or .env file not found. Aborting.")
  169. exit(1)
  170. set_auth(token)
  171. def has_suffix(s, suffixes):
  172. for suffix in suffixes:
  173. if s.endswith(suffix):
  174. return True
  175. return False
  176. def is_valid_redirect_url(url):
  177. if len(url) < len("/a"):
  178. return False
  179. if not has_suffix(url.lower(), REDIRECT_SUFFIXES):
  180. return False
  181. return True
  182. def redirect_to_str(item):
  183. return id(item[0], item[1])
  184. def main():
  185. args = parse_command_line_args()
  186. if not args.dry_run:
  187. load_auth()
  188. retry_strategy = Retry(
  189. total=3,
  190. status_forcelist=[429, 500, 502, 503, 504],
  191. backoff_factor=2,
  192. method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
  193. )
  194. adapter = HTTPAdapter(max_retries=retry_strategy)
  195. global HTTP
  196. HTTP = requests.Session()
  197. HTTP.mount("https://", adapter)
  198. HTTP.mount("http://", adapter)
  199. to_add = []
  200. redirects_file = []
  201. with open(args.file, "r", encoding="utf-8") as f:
  202. redirects_file = list(csv.DictReader(f))
  203. if len(redirects_file) > 0:
  204. assert redirects_file[0].keys() == {
  205. "source",
  206. "destination",
  207. }, "CSV file must have a header and two columns: source, destination."
  208. for row in redirects_file:
  209. to_add.append([row["source"], row["destination"]])
  210. print("Loaded", len(redirects_file), "redirects from", args.file + ".")
  211. existing = []
  212. if not args.dry_run:
  213. existing = get_existing_redirects(args.delete)
  214. print("Loaded", len(existing), "existing redirects from RTD.")
  215. print("Total redirects:", str(len(to_add)) +
  216. " (+" + str(len(existing)), "existing.)")
  217. redirects = []
  218. added = {}
  219. sources = {}
  220. for redirect in to_add:
  221. if len(redirect) != 2:
  222. print("Invalid redirect:", redirect, "- expected 2 elements, got:", len(redirect))
  223. continue
  224. if redirect[0] == redirect[1]:
  225. print("Invalid redirect:", redirect, "- redirects to itself!")
  226. continue
  227. if not is_valid_redirect_url(redirect[0]) or not is_valid_redirect_url(redirect[1]):
  228. print("Invalid redirect:", redirect, "- invalid URL!")
  229. continue
  230. if not redirect[0].startswith("/") or not redirect[1].startswith("/"):
  231. print("Invalid redirect:", redirect, "- invalid URL: should start with slash!")
  232. continue
  233. if redirect[0] in sources:
  234. print("Invalid redirect:", redirect,
  235. "- collision, source", redirect[0], "already has redirect:",
  236. sources[redirect[0]])
  237. continue
  238. redirect_id = id(redirect[0], redirect[1])
  239. if redirect_id in added:
  240. # Duplicate; skip.
  241. continue
  242. added[redirect_id] = True
  243. sources[redirect[0]] = redirect
  244. redirects.append(redirect)
  245. redirects.sort(key=redirect_to_str)
  246. with open(args.file, "w", encoding="utf-8", newline="") as f:
  247. writer = csv.writer(f)
  248. writer.writerows([["source", "destination"]])
  249. writer.writerows(redirects)
  250. existing_ids = {}
  251. for e in existing:
  252. existing_ids[id(e[0], e[1])] = True
  253. if not args.dump:
  254. print("Creating redirects.")
  255. for redirect in redirects:
  256. if not id(redirect[0], redirect[1]) in existing_ids:
  257. make_redirect(redirect[0], redirect[1], args)
  258. if not args.dry_run:
  259. sleep()
  260. print("Finished creating", len(redirects), "redirects.")
  261. if args.dry_run:
  262. print("THIS WAS A DRY RUN, NOTHING WAS SUBMITTED TO READTHEDOCS!")
  263. if __name__ == "__main__":
  264. main()