Browse Source

Merge pull request #5414 from NathanLovato/gdquest/readthedocs_redirect_scripts

Code scripts to find renamed files in the docs and create redirects
Max Hilbrunner 3 years ago
parent
commit
66cc39c7b1
3 changed files with 209 additions and 0 deletions
  1. 95 0
      _tools/convert_git_renames_to_csv.py
  2. 112 0
      _tools/create_redirects.py
  3. 2 0
      _tools/requirements.txt

+ 95 - 0
_tools/convert_git_renames_to_csv.py

@@ -0,0 +1,95 @@
+"""Uses git to list files that were renamed between two revisions and converts
+that to a CSV table.
+
+Use it to prepare and double-check data for create_redirects.py.
+"""
+
+import subprocess
+import argparse
+import csv
+import sys
+
+try:
+    subprocess.check_output(["git", "--version"])
+except subprocess.CalledProcessError:
+    print("Git not found. It's required to run this program.")
+
+
+def parse_command_line_args():
+    parser = argparse.ArgumentParser(
+        description="Uses git to list files that were renamed between two revisions and "
+        "converts that to a CSV table. Use it to prepare and double-check data for create_redirects.py."
+    )
+    parser.add_argument(
+        "revision1",
+        type=str,
+        help="Start revision to get renamed files from.",
+    )
+    parser.add_argument(
+        "revision2",
+        type=str,
+        help="End revision to get renamed files from.",
+    )
+    parser.add_argument("-f", "--output-file", type=str, help="Path to the output file")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_command_line_args()
+    assert args.revision1 != args.revision2, "Revisions must be different."
+    for revision in [args.revision1, args.revision2]:
+        assert not "/" in revision, "Revisions must be local branches only."
+
+    # Ensure that both revisions are present in the local repository.
+    for revision in [args.revision1, args.revision2]:
+        try:
+            subprocess.check_output(
+                ["git", "rev-list", f"HEAD..{revision}"], stderr=subprocess.STDOUT
+            )
+        except subprocess.CalledProcessError:
+            print(
+                f"Revision {revision} not found in this repository. "
+                "Please make sure that both revisions exist locally in your git repository."
+            )
+            exit(1)
+
+    # Get the list of renamed files between the two revisions.
+    renamed_files = (
+        subprocess.check_output(
+            [
+                "git",
+                "diff",
+                "--name-status",
+                "--diff-filter=R",
+                args.revision1,
+                args.revision2,
+            ]
+        )
+        .decode("utf-8")
+        .split("\n")
+    )
+    renamed_documents = [f for f in renamed_files if f.endswith(".rst")]
+
+    csv_data: list[dict] = []
+    branch = args.revision2
+    for document in renamed_documents:
+        _, source, destination = document.split("\t")
+        csv_data.append(
+            {"source": source, "destination": destination, "branch": branch}
+        )
+
+    if args.output_file:
+        with open(args.output_file, "w") as f:
+            writer = csv.DictWriter(f, fieldnames=csv_data[0].keys()).writerows(
+                csv_data
+            )
+            writer.writeheader()
+            writer.writerows(csv_data)
+    else:
+        writer = csv.DictWriter(sys.stdout, fieldnames=csv_data[0].keys())
+        writer.writeheader()
+        writer.writerows(csv_data)
+
+
+if __name__ == "__main__":
+    main()

+ 112 - 0
_tools/create_redirects.py

@@ -0,0 +1,112 @@
+"""Create page redirects for a specific branch of the docs.
+
+Loads data from a CSV file with three columns: source, destination, branch
+
+Where the source and destination are paths to RST files in the repository.
+
+Pre-requisites:
+
+- You need the dotenv Python module installed. We use this to let you store your
+  API auth token privately.
+  
+  You can install it by running: pip3 install -r requirements.txt
+
+How to use:
+
+- Generate a CSV file from two git revisions using convert_git_renames_to_csv.py
+- Store your API token in a .env variable in this directory like so:
+  RTD_API_TOKEN=your_token_here
+- Run this script, passing it the path to your generated CSV file as an
+  argument.
+
+The script directly creates redirects using the CSV data. It does not check if a
+redirect already exist or if it's correct.
+"""
+
+import argparse
+import csv
+import json
+import os
+
+import dotenv
+from requests.models import default_hooks
+
+try:
+    import requests
+except ImportError:
+    print(
+        "Required third-party module `requests` not found. "
+        "Please install it with `pip install requests` (or `pip3 install requests` on Linux)."
+    )
+
+
+dotenv.load_dotenv()
+RTD_AUTH_TOKEN: str = os.environ.get("RTD_AUTH_TOKEN", "")
+if RTD_AUTH_TOKEN == "":
+    print("Missing auth token in .env file or .env file not found. Aborting.")
+    exit(1)
+
+REDIRECT_URL = "https://readthedocs.org/api/v3/projects/pip/redirects/"
+REQUEST_HEADERS = {"Authorization": f"token {RTD_AUTH_TOKEN}"}
+
+
+def parse_command_line_args():
+    parser = argparse.ArgumentParser(
+        description="Create page redirects for a specific branch of the docs."
+    )
+    parser.add_argument(
+        "csv_file",
+        type=str,
+        help="Path to a CSV file with three columns: source, destination, branch.",
+    )
+    # add dry-run argument
+    parser.add_argument(
+        "-d",
+        "--dry-run",
+        action="store_true",
+        help="Run the program and output information without side effects.",
+    )
+    return parser.parse_args()
+
+
+def make_redirect(source, destination, branch, args):
+    # Currently, the program only works for the EN version of the docs
+    trimmed_source = source.replace(".rst", "")
+    trimmed_destination = destination.replace(".rst", "")
+
+    source_slug = f"/en/{branch}/{trimmed_source}"
+    destination_slug = f"/en/{branch}/{trimmed_destination}"
+    json_data = {"from_url": source_slug, "to_url": destination_slug, "type": "page"}
+    if args.dry_run:
+        print(f"{source_slug} -> {destination_slug}")
+    else:
+        response = requests.post(
+            REDIRECT_URL,
+            json=json.dumps(json_data),
+            headers=REQUEST_HEADERS,
+        )
+        if response.status_code == 201:
+            print(f"Created redirect {source_slug} -> {destination_slug}")
+        else:
+            print(
+                f"Failed to create redirect {source_slug} -> {destination_slug}. "
+                f"Status code: {response.status_code}"
+            )
+
+
+def main():
+    args = parse_command_line_args()
+    redirect_data = []
+    with open(args.csv_file, "r") as f:
+        redirect_data = list(csv.DictReader(f))
+    assert redirect_data[0].keys() == {
+        "source",
+        "destination",
+        "branch",
+    }, "CSV file must have those three columns: source, destination, branch."
+    for row in redirect_data:
+        make_redirect(row["source"], row["destination"], row["branch"], args)
+
+
+if __name__ == "__main__":
+    main()

+ 2 - 0
_tools/requirements.txt

@@ -0,0 +1,2 @@
+python-dotenv==0.18.0
+requests==2.20.0