Browse Source

Fixed diacritic-related mappings in charset tables (#4083)

Nick Sergeev 1 month ago
parent
commit
e49ee2dbd8
4 changed files with 58 additions and 338 deletions
  1. 20 112
      src/charsets/cjk.txt
  2. 19 113
      src/charsets/cont.txt
  3. 17 112
      src/charsets/japanese.txt
  4. 2 1
      src/charsets/korean.txt

+ 20 - 112
src/charsets/cjk.txt

@@ -225,53 +225,10 @@ U+3047,
 U+3048->U+3047,
 U+3049,
 U+304A->U+3049,
-U+304B,
-U+304C->U+304B,
-U+304D,
-U+304E->U+304D,
-U+304F,
-U+3050->U+304F,
-U+3051,
-U+3052->U+3051,
-U+3053,
-U+3054->U+3053,
-U+3055,
-U+3056->U+3055,
-U+3057,
-U+3058->U+3057,
-U+3059,
-U+305A->U+3059,
-U+305B,
-U+305C->U+305B,
-U+305D,
-U+305E->U+305D,
-U+305F,
-U+3060->U+305F,
-U+3061,
-U+3062->U+3061,
-U+3063,
+U+304B..U+3063,
 U+3064->U+3063,
 U+3065->U+3063,
-U+3066,
-U+3067->U+3066,
-U+3068,
-U+3069->U+3068,
-U+306A..U+306F,
-U+3070->U+306F,
-U+3071->U+306F,
-U+3072,
-U+3073->U+3072,
-U+3074->U+3072,
-U+3075,
-U+3076->U+3075,
-U+3077->U+3075,
-U+3078,
-U+3079->U+3078,
-U+307A->U+3078,
-U+307B,
-U+307C->U+307B,
-U+307D->U+307B,
-U+307E..U+3083,
+U+3066..U+3083,
 U+3084->U+3083,
 U+3085,
 U+3086->U+3085,
@@ -280,7 +237,7 @@ U+3088->U+3087,
 U+3089..U+308E,
 U+308F->U+308E,
 U+3090..U+3093,
-U+3094->U+3046,
+U+3094,
 U+3095->U+304B,
 U+3096->U+3051,
 U+30A1,
@@ -294,62 +251,31 @@ U+30A8->U+30A7,
 U+30A9,
 U+30AA->U+30A9,
 U+30AB->U+30F5,
-U+30AC->U+30AB,
-U+30AD,
+U+30AC..U+30AD,
 U+30AE->U+30AD,
-U+30AF,
-U+30B0->U+30AF,
+U+30AF..U+30B0,
 U+30B1->U+30F6,
-U+30B2->U+30B1,
-U+30B3,
-U+30B4->U+30B3,
-U+30B5,
-U+30B6->U+30B5,
+U+30B2..U+30B6,
 U+30B7->U+31F1,
-U+30B8->U+30B7,
+U+30B8,
 U+30B9->U+31F2,
-U+30BA->U+30B9,
-U+30BB,
-U+30BC->U+30BB,
-U+30BD,
-U+30BE->U+30BD,
-U+30BF,
-U+30C0->U+30BF,
-U+30C1,
-U+30C2->U+30C1,
-U+30C3,
-U+30C4,
-U+30C5->U+30C4,
-U+30C6,
-U+30C7->U+30C6,
+U+30BA..U+30C7,
 U+30C8->U+31F3,
-U+30C9->U+30C8,
-U+30CA,
-U+30CB,
+U+30C9..U+30CB,
 U+30CC->U+31F4,
-U+30CD,
-U+30CE,
+U+30CD..U+30CE,
 U+30CF->U+31F5,
-U+30D0->U+30CF,
-U+30D1->U+30CF,
+U+30D0..U+30D1,
 U+30D2->U+31F6,
-U+30D3->U+30D2,
-U+30D4->U+30D2,
+U+30D3..U+30D4,
 U+30D5->U+31F7,
-U+30D6->U+30D5,
-U+30D7->U+30D5,
+U+30D6..U+30D7,
 U+30D8->U+31F8,
-U+30D9->U+30D8,
-U+30DA->U+30D8,
+U+30D9..U+30DA,
 U+30DB->U+31F9,
-U+30DC->U+30DB,
-U+30DD->U+30DB,
-U+30DE,
-U+30DF,
+U+30DC..U+30DF,
 U+30E0->U+31FA,
-U+30E1,
-U+30E2,
-U+30E3,
+U+30E1..U+30E3,
 U+30E4->U+30E3,
 U+30E5,
 U+30E6->U+30E5,
@@ -362,11 +288,7 @@ U+30EC->U+31FE,
 U+30ED->U+31FF,
 U+30EE,
 U+30EF->U+30EE,
-U+30F0..U+30F3,
-U+30F4->U+30A6,
-U+30F5,
-U+30F6,
-U+30F7..U+30FA->U+30EF..U+30F2,
+U+30F0..U+30FA,
 U+3105..U+312C,
 U+3131..U+3132->U+1100..U+1101,
 U+3133->U+11AA,
@@ -400,7 +322,8 @@ U+317D->U+1132,
 U+317E->U+1136,
 U+317F->U+1140,
 U+3180->U+1147,
-U+3181..U+3183->U+11F0..U+11F2,
+U+3181->U+114C,
+U+3182..U+3183->U+11F1..U+11F2,
 U+3184..U+3186->U+1157..U+1159,
 U+3187..U+3188->U+1184..U+1185,
 U+3189->U+1188,
@@ -409,22 +332,7 @@ U+318C->U+1194,
 U+318D->U+119E,
 U+318E->U+11A1,
 U+31A0..U+31B7,
-U+31F0,
-U+31F1,
-U+31F2,
-U+31F3,
-U+31F4,
-U+31F5,
-U+31F6,
-U+31F7,
-U+31F8,
-U+31F9,
-U+31FA,
-U+31FB,
-U+31FC,
-U+31FD,
-U+31FE,
-U+31FF,
+U+31F0..U+31FF,
 U+3400..U+4DB5,
 U+4E00..U+9FBB,
 U+A000..U+A48C,

+ 19 - 113
src/charsets/cont.txt

@@ -230,53 +230,10 @@ U+3047,
 U+3048->U+3047,
 U+3049,
 U+304A->U+3049,
-U+304B,
-U+304C->U+304B,
-U+304D,
-U+304E->U+304D,
-U+304F,
-U+3050->U+304F,
-U+3051,
-U+3052->U+3051,
-U+3053,
-U+3054->U+3053,
-U+3055,
-U+3056->U+3055,
-U+3057,
-U+3058->U+3057,
-U+3059,
-U+305A->U+3059,
-U+305B,
-U+305C->U+305B,
-U+305D,
-U+305E->U+305D,
-U+305F,
-U+3060->U+305F,
-U+3061,
-U+3062->U+3061,
-U+3063,
+U+304B..U+3063,
 U+3064->U+3063,
 U+3065->U+3063,
-U+3066,
-U+3067->U+3066,
-U+3068,
-U+3069->U+3068,
-U+306A..U+306F,
-U+3070->U+306F,
-U+3071->U+306F,
-U+3072,
-U+3073->U+3072,
-U+3074->U+3072,
-U+3075,
-U+3076->U+3075,
-U+3077->U+3075,
-U+3078,
-U+3079->U+3078,
-U+307A->U+3078,
-U+307B,
-U+307C->U+307B,
-U+307D->U+307B,
-U+307E..U+3083,
+U+3066..U+3083,
 U+3084->U+3083,
 U+3085,
 U+3086->U+3085,
@@ -285,7 +242,7 @@ U+3088->U+3087,
 U+3089..U+308E,
 U+308F->U+308E,
 U+3090..U+3093,
-U+3094->U+3046,
+U+3094,
 U+3095->U+304B,
 U+3096->U+3051,
 U+30A1,
@@ -299,62 +256,29 @@ U+30A8->U+30A7,
 U+30A9,
 U+30AA->U+30A9,
 U+30AB->U+30F5,
-U+30AC->U+30AB,
-U+30AD,
-U+30AE->U+30AD,
-U+30AF,
-U+30B0->U+30AF,
+U+30AC..U+30B0,
 U+30B1->U+30F6,
-U+30B2->U+30B1,
-U+30B3,
-U+30B4->U+30B3,
-U+30B5,
-U+30B6->U+30B5,
+U+30B2..U+30B6,
 U+30B7->U+31F1,
-U+30B8->U+30B7,
+U+30B8,
 U+30B9->U+31F2,
-U+30BA->U+30B9,
-U+30BB,
-U+30BC->U+30BB,
-U+30BD,
-U+30BE->U+30BD,
-U+30BF,
-U+30C0->U+30BF,
-U+30C1,
-U+30C2->U+30C1,
-U+30C3,
-U+30C4,
-U+30C5->U+30C4,
-U+30C6,
-U+30C7->U+30C6,
+U+30BA..U+30C7,
 U+30C8->U+31F3,
-U+30C9->U+30C8,
-U+30CA,
-U+30CB,
+U+30C9..U+30CB,
 U+30CC->U+31F4,
-U+30CD,
-U+30CE,
+U+30CD..U+30CE,
 U+30CF->U+31F5,
-U+30D0->U+30CF,
-U+30D1->U+30CF,
+U+30D0..U+30D1,
 U+30D2->U+31F6,
-U+30D3->U+30D2,
-U+30D4->U+30D2,
+U+30D3..U+30D4,
 U+30D5->U+31F7,
-U+30D6->U+30D5,
-U+30D7->U+30D5,
+U+30D6..U+30D7,
 U+30D8->U+31F8,
-U+30D9->U+30D8,
-U+30DA->U+30D8,
+U+30D9..U+30DA,
 U+30DB->U+31F9,
-U+30DC->U+30DB,
-U+30DD->U+30DB,
-U+30DE,
-U+30DF,
+U+30DC..U+30DF,
 U+30E0->U+31FA,
-U+30E1,
-U+30E2,
-U+30E3,
+U+30E1..U+30E3,
 U+30E4->U+30E3,
 U+30E5,
 U+30E6->U+30E5,
@@ -367,11 +291,7 @@ U+30EC->U+31FE,
 U+30ED->U+31FF,
 U+30EE,
 U+30EF->U+30EE,
-U+30F0..U+30F3,
-U+30F4->U+30A6,
-U+30F5,
-U+30F6,
-U+30F7..U+30FA->U+30EF..U+30F2,
+U+30F0..U+30FA,
 U+3105..U+312C,
 U+3131..U+3132->U+1100..U+1101,
 U+3133->U+11AA,
@@ -405,7 +325,8 @@ U+317D->U+1132,
 U+317E->U+1136,
 U+317F->U+1140,
 U+3180->U+1147,
-U+3181..U+3183->U+11F0..U+11F2,
+U+3181->U+114C,
+U+3182..U+3183->U+11F1..U+11F2,
 U+3184..U+3186->U+1157..U+1159,
 U+3187..U+3188->U+1184..U+1185,
 U+3189->U+1188,
@@ -414,22 +335,7 @@ U+318C->U+1194,
 U+318D->U+119E,
 U+318E->U+11A1,
 U+31A0..U+31B7,
-U+31F0,
-U+31F1,
-U+31F2,
-U+31F3,
-U+31F4,
-U+31F5,
-U+31F6,
-U+31F7,
-U+31F8,
-U+31F9,
-U+31FA,
-U+31FB,
-U+31FC,
-U+31FD,
-U+31FE,
-U+31FF,
+U+31F0..U+31FF,
 U+3400..U+4DB5,
 U+4E00..U+9FBB,
 U+A000..U+A48C,

+ 17 - 112
src/charsets/japanese.txt

@@ -222,53 +222,10 @@ U+3047,
 U+3048->U+3047,
 U+3049,
 U+304A->U+3049,
-U+304B,
-U+304C->U+304B,
-U+304D,
-U+304E->U+304D,
-U+304F,
-U+3050->U+304F,
-U+3051,
-U+3052->U+3051,
-U+3053,
-U+3054->U+3053,
-U+3055,
-U+3056->U+3055,
-U+3057,
-U+3058->U+3057,
-U+3059,
-U+305A->U+3059,
-U+305B,
-U+305C->U+305B,
-U+305D,
-U+305E->U+305D,
-U+305F,
-U+3060->U+305F,
-U+3061,
-U+3062->U+3061,
-U+3063,
+U+304B..U+3063,
 U+3064->U+3063,
 U+3065->U+3063,
-U+3066,
-U+3067->U+3066,
-U+3068,
-U+3069->U+3068,
-U+306A..U+306F,
-U+3070->U+306F,
-U+3071->U+306F,
-U+3072,
-U+3073->U+3072,
-U+3074->U+3072,
-U+3075,
-U+3076->U+3075,
-U+3077->U+3075,
-U+3078,
-U+3079->U+3078,
-U+307A->U+3078,
-U+307B,
-U+307C->U+307B,
-U+307D->U+307B,
-U+307E..U+3083,
+U+3066..U+3083,
 U+3084->U+3083,
 U+3085,
 U+3086->U+3085,
@@ -277,7 +234,7 @@ U+3088->U+3087,
 U+3089..U+308E,
 U+308F->U+308E,
 U+3090..U+3093,
-U+3094->U+3046,
+U+3094,
 U+3095->U+304B,
 U+3096->U+3051,
 U+30A1,
@@ -291,62 +248,29 @@ U+30A8->U+30A7,
 U+30A9,
 U+30AA->U+30A9,
 U+30AB->U+30F5,
-U+30AC->U+30AB,
-U+30AD,
-U+30AE->U+30AD,
-U+30AF,
-U+30B0->U+30AF,
+U+30AC..U+30B0,
 U+30B1->U+30F6,
-U+30B2->U+30B1,
-U+30B3,
-U+30B4->U+30B3,
-U+30B5,
-U+30B6->U+30B5,
+U+30B2..U+30B6,
 U+30B7->U+31F1,
-U+30B8->U+30B7,
+U+30B8,
 U+30B9->U+31F2,
-U+30BA->U+30B9,
-U+30BB,
-U+30BC->U+30BB,
-U+30BD,
-U+30BE->U+30BD,
-U+30BF,
-U+30C0->U+30BF,
-U+30C1,
-U+30C2->U+30C1,
-U+30C3,
-U+30C4,
-U+30C5->U+30C4,
-U+30C6,
-U+30C7->U+30C6,
+U+30BA..U+30C7,
 U+30C8->U+31F3,
-U+30C9->U+30C8,
-U+30CA,
-U+30CB,
+U+30C9..U+30CB,
 U+30CC->U+31F4,
-U+30CD,
-U+30CE,
+U+30CD..U+30CE,
 U+30CF->U+31F5,
-U+30D0->U+30CF,
-U+30D1->U+30CF,
+U+30D0..U+30D1,
 U+30D2->U+31F6,
-U+30D3->U+30D2,
-U+30D4->U+30D2,
+U+30D3..U+30D4,
 U+30D5->U+31F7,
-U+30D6->U+30D5,
-U+30D7->U+30D5,
+U+30D6..U+30D7,
 U+30D8->U+31F8,
-U+30D9->U+30D8,
-U+30DA->U+30D8,
+U+30D9..U+30DA,
 U+30DB->U+31F9,
-U+30DC->U+30DB,
-U+30DD->U+30DB,
-U+30DE,
-U+30DF,
+U+30DC..U+30DF,
 U+30E0->U+31FA,
-U+30E1,
-U+30E2,
-U+30E3,
+U+30E1..U+30E3,
 U+30E4->U+30E3,
 U+30E5,
 U+30E6->U+30E5,
@@ -359,26 +283,7 @@ U+30EC->U+31FE,
 U+30ED->U+31FF,
 U+30EE,
 U+30EF->U+30EE,
-U+30F0..U+30F3,
-U+30F4->U+30A6,
-U+30F5,
-U+30F6,
-U+30F7..U+30FA->U+30EF..U+30F2,
+U+30F0..U+30FA,
 U+31A0..U+31B7,
-U+31F0,
-U+31F1,
-U+31F2,
-U+31F3,
-U+31F4,
-U+31F5,
-U+31F6,
-U+31F7,
-U+31F8,
-U+31F9,
-U+31FA,
-U+31FB,
-U+31FC,
-U+31FD,
-U+31FE,
-U+31FF,
+U+31F0..U+31FF,
 U+20000..U+2A6D6,

+ 2 - 1
src/charsets/korean.txt

@@ -247,7 +247,8 @@ U+317D->U+1132,
 U+317E->U+1136,
 U+317F->U+1140,
 U+3180->U+1147,
-U+3181..U+3183->U+11F0..U+11F2,
+U+3181->U+114C,
+U+3182..U+3183->U+11F1..U+11F2,
 U+3184..U+3186->U+1157..U+1159,
 U+3187..U+3188->U+1184..U+1185,
 U+3189->U+1188,