소스 검색

Update `ucaps.h` to contain proper case matchings

Jakub Marcowski 1 년 전
부모
커밋
104857687c
2개의 변경된 파일976개의 추가작업 그리고 43개의 파일을 삭제
  1. 856 43
      core/string/ucaps.h
  2. 120 0
      misc/scripts/ucaps_fetch.py

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 856 - 43
core/string/ucaps.h


+ 120 - 0
misc/scripts/ucaps_fetch.py

@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+# Script used to dump case mappings from
+# the Unicode Character Database to the `ucaps.h` file.
+# NOTE: This script is deliberately not integrated into the build system;
+# you should run it manually whenever you want to update the data.
+
+import os
+import sys
+from typing import Final, List, Tuple
+from urllib.request import urlopen
+
+if __name__ == "__main__":
+    sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
+
+from methods import generate_copyright_header
+
+URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/UnicodeData.txt"
+
+
+lower_to_upper: List[Tuple[str, str]] = []
+upper_to_lower: List[Tuple[str, str]] = []
+
+
+def parse_unicode_data() -> None:
+    lines: List[str] = [line.decode("utf-8") for line in urlopen(URL)]
+
+    for line in lines:
+        split_line: List[str] = line.split(";")
+
+        code_value: str = split_line[0].strip()
+        uppercase_mapping: str = split_line[12].strip()
+        lowercase_mapping: str = split_line[13].strip()
+
+        if uppercase_mapping:
+            lower_to_upper.append((f"0x{code_value}", f"0x{uppercase_mapping}"))
+        if lowercase_mapping:
+            upper_to_lower.append((f"0x{code_value}", f"0x{lowercase_mapping}"))
+
+
+def make_cap_table(table_name: str, len_name: str, table: List[Tuple[str, str]]) -> str:
+    result: str = f"static const int {table_name}[{len_name}][2] = {{\n"
+
+    for first, second in table:
+        result += f"\t{{ {first}, {second} }},\n"
+
+    result += "};\n\n"
+
+    return result
+
+
+def generate_ucaps_fetch() -> None:
+    parse_unicode_data()
+
+    source: str = generate_copyright_header("ucaps.h")
+
+    source += f"""
+#ifndef UCAPS_H
+#define UCAPS_H
+
+// This file was generated using the `misc/scripts/ucaps_fetch.py` script.
+
+#define LTU_LEN {len(lower_to_upper)}
+#define UTL_LEN {len(upper_to_lower)}\n\n"""
+
+    source += make_cap_table("caps_table", "LTU_LEN", lower_to_upper)
+    source += make_cap_table("reverse_caps_table", "UTL_LEN", upper_to_lower)
+
+    source += """static int _find_upper(int ch) {
+\tint low = 0;
+\tint high = LTU_LEN - 1;
+\tint middle;
+
+\twhile (low <= high) {
+\t\tmiddle = (low + high) / 2;
+
+\t\tif (ch < caps_table[middle][0]) {
+\t\t\thigh = middle - 1; // Search low end of array.
+\t\t} else if (caps_table[middle][0] < ch) {
+\t\t\tlow = middle + 1; // Search high end of array.
+\t\t} else {
+\t\t\treturn caps_table[middle][1];
+\t\t}
+\t}
+
+\treturn ch;
+}
+
+static int _find_lower(int ch) {
+\tint low = 0;
+\tint high = UTL_LEN - 1;
+\tint middle;
+
+\twhile (low <= high) {
+\t\tmiddle = (low + high) / 2;
+
+\t\tif (ch < reverse_caps_table[middle][0]) {
+\t\t\thigh = middle - 1; // Search low end of array.
+\t\t} else if (reverse_caps_table[middle][0] < ch) {
+\t\t\tlow = middle + 1; // Search high end of array.
+\t\t} else {
+\t\t\treturn reverse_caps_table[middle][1];
+\t\t}
+\t}
+
+\treturn ch;
+}
+
+#endif // UCAPS_H
+"""
+
+    ucaps_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/ucaps.h")
+    with open(ucaps_path, "w", newline="\n") as f:
+        f.write(source)
+
+    print("`ucaps.h` generated successfully.")
+
+
+if __name__ == "__main__":
+    generate_ucaps_fetch()

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.