Browse Source

* synchronized with trunk

git-svn-id: branches/unicodekvm@48724 -
nickysn 4 years ago
parent
commit
58867e0a4f

+ 4 - 0
.gitattributes

@@ -9098,6 +9098,8 @@ packages/rtl-unicode/src/inc/cp949.pas svneol=native#text/pascal
 packages/rtl-unicode/src/inc/cp950.pas svneol=native#text/pascal
 packages/rtl-unicode/src/inc/cp950.pas svneol=native#text/pascal
 packages/rtl-unicode/src/inc/cpbuildu.pp svneol=native#text/plain
 packages/rtl-unicode/src/inc/cpbuildu.pp svneol=native#text/plain
 packages/rtl-unicode/src/inc/freebidi.pp svneol=native#text/plain
 packages/rtl-unicode/src/inc/freebidi.pp svneol=native#text/plain
+packages/rtl-unicode/src/inc/graphemebreakproperty.pp svneol=native#text/plain
+packages/rtl-unicode/src/inc/graphemebreakproperty_code.inc svneol=native#text/plain
 packages/rtl-unicode/src/inc/ucadata.inc svneol=native#text/pascal
 packages/rtl-unicode/src/inc/ucadata.inc svneol=native#text/pascal
 packages/rtl-unicode/src/inc/ucadata_be.inc svneol=native#text/pascal
 packages/rtl-unicode/src/inc/ucadata_be.inc svneol=native#text/pascal
 packages/rtl-unicode/src/inc/ucadata_le.inc svneol=native#text/pascal
 packages/rtl-unicode/src/inc/ucadata_le.inc svneol=native#text/pascal
@@ -19835,6 +19837,8 @@ utils/unicode/cldrtxt.pas svneol=native#text/plain
 utils/unicode/cldrxml.pas svneol=native#text/pascal
 utils/unicode/cldrxml.pas svneol=native#text/pascal
 utils/unicode/data/readme.txt svneol=native#text/plain
 utils/unicode/data/readme.txt svneol=native#text/plain
 utils/unicode/fpmake.pp svneol=native#text/plain
 utils/unicode/fpmake.pp svneol=native#text/plain
+utils/unicode/gbpparser.lpi svneol=native#text/plain
+utils/unicode/gbpparser.lpr svneol=native#text/pascal
 utils/unicode/grbtree.pas svneol=native#text/pascal
 utils/unicode/grbtree.pas svneol=native#text/pascal
 utils/unicode/helper.pas svneol=native#text/pascal
 utils/unicode/helper.pas svneol=native#text/pascal
 utils/unicode/parse-collations.bat svneol=native#text/plain
 utils/unicode/parse-collations.bat svneol=native#text/plain

+ 4 - 0
compiler/aarch64/cgcpu.pas

@@ -1797,12 +1797,16 @@ implementation
             reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
             reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
             ref.addressmode:=AM_PREINDEXED;
             ref.addressmode:=AM_PREINDEXED;
             list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
             list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+            current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
+            current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
+            current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
             if target_info.system=system_aarch64_win64 then
             if target_info.system=system_aarch64_win64 then
               list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
               list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
             { initialise frame pointer }
             { initialise frame pointer }
             if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
             if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
               begin
               begin
                 a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
                 a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+                current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
                 if target_info.system=system_aarch64_win64 then
                 if target_info.system=system_aarch64_win64 then
                   list.concat(cai_seh_directive.create(ash_setfp));
                   list.concat(cai_seh_directive.create(ash_setfp));
               end
               end

+ 13 - 0
packages/rtl-objpas/src/inc/dateutil.inc

@@ -44,6 +44,19 @@ const
   DaySaturday  = 6;
   DaySaturday  = 6;
   DaySunday    = 7;
   DaySunday    = 7;
 
 
+  MonthJanuary   = 1;
+  MonthFebruary  = 2;
+  MonthMarch     = 3;
+  MonthApril     = 4;
+  MonthMay       = 5;
+  MonthJune      = 6;
+  MonthJuly      = 7;
+  MonthAugust    = 8;
+  MonthSeptember = 9;
+  MonthOctober   = 10;
+  MonthNovember  = 11;
+  MonthDecember  = 12;
+
   // Fraction of a day
   // Fraction of a day
   OneHour        = TDateTime(1)/HoursPerDay;
   OneHour        = TDateTime(1)/HoursPerDay;
   OneMinute      = TDateTime(1)/MinsPerDay;
   OneMinute      = TDateTime(1)/MinsPerDay;

+ 8 - 1
packages/rtl-unicode/fpmake.pp

@@ -16,11 +16,12 @@ Const
   CPUnits       = [aix,amiga,aros,android,beos,darwin,iphonesim,ios,emx,gba,nds,freebsd,go32v2,haiku,linux,morphos,netbsd,netware,netwlibc,openbsd,os2,solaris,watcom,wii,win32,win64,wince,dragonfly,freertos];
   CPUnits       = [aix,amiga,aros,android,beos,darwin,iphonesim,ios,emx,gba,nds,freebsd,go32v2,haiku,linux,morphos,netbsd,netware,netwlibc,openbsd,os2,solaris,watcom,wii,win32,win64,wince,dragonfly,freertos];
   utf8bidiOSes  = [netware,netwlibc];
   utf8bidiOSes  = [netware,netwlibc];
   freebidiOSes  = [netware,netwlibc];
   freebidiOSes  = [netware,netwlibc];
+  GraphemeBreakPropertyOSes = AllOSes;
 
 
 // Character not movable because fpwidestring depends on it.
 // Character not movable because fpwidestring depends on it.
 //  CharacterOSes = [android,darwin,freebsd,linux,netbsd,openbsd,solaris,win32,win64,dragonfly];
 //  CharacterOSes = [android,darwin,freebsd,linux,netbsd,openbsd,solaris,win32,win64,dragonfly];
 
 
-  UnicodeAllOSes =   CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits;
+  UnicodeAllOSes =   CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits + GraphemeBreakPropertyOSes;
 
 
 // Amiga has a crt in its RTL dir, but it is commented in the makefile
 // Amiga has a crt in its RTL dir, but it is commented in the makefile
 
 
@@ -138,6 +139,12 @@ begin
     T:=P.Targets.AddImplicitUnit('cp950.pas',CPUnits);
     T:=P.Targets.AddImplicitUnit('cp950.pas',CPUnits);
 
 
 //    T:=P.Targets.AddUnit('character.pp',characterOSes);
 //    T:=P.Targets.AddUnit('character.pp',characterOSes);
+
+    T:=P.Targets.AddUnit('graphemebreakproperty.pp',GraphemeBreakPropertyOSes);
+    with T.Dependencies do
+      begin
+        AddInclude('graphemebreakproperty_code.inc');
+      end;
   end
   end
 end;
 end;
 
 

+ 37 - 0
packages/rtl-unicode/src/inc/graphemebreakproperty.pp

@@ -0,0 +1,37 @@
+unit graphemebreakproperty;
+
+{$MODE objfpc}
+
+interface
+
+type
+  TGraphemeBreakProperty = (
+    gbpOther,
+    gbpPrepend,
+    gbpCR,
+    gbpLF,
+    gbpControl,
+    gbpExtend,
+    gpbRegional_Indicator,
+    gbpSpacingMark,
+    gbpL,
+    gbpV,
+    gbpT,
+    gbpLV,
+    gbpLVT,
+    gbpE_Base,
+    gbpE_Modifier,
+    gbpZWJ,
+    gbpGlue_After_Zwj,
+    gbpE_Base_GAZ);
+
+function GetGraphemeBreakProperty(Ch: UCS4Char): TGraphemeBreakProperty;
+
+implementation
+
+function GetGraphemeBreakProperty(Ch: UCS4Char): TGraphemeBreakProperty;
+begin
+  {$I graphemebreakproperty_code.inc}
+end;
+
+end.

+ 511 - 0
packages/rtl-unicode/src/inc/graphemebreakproperty_code.inc

@@ -0,0 +1,511 @@
+{ do not edit, this file is autogenerated by the gbpparser tool }
+if Ch=13then result:=gbpCR else
+if Ch=10then result:=gbpLF else
+if Ch=8205then result:=gbpZWJ else
+if(Ch>=127462)and(Ch<=127487)then result:=gpbRegional_Indicator else
+if(Ch>=127995)and(Ch<=127999)then result:=gbpE_Modifier else
+if(Ch>=128102)and(Ch<=128105)then result:=gbpE_Base_GAZ else
+if(Ch>=44032)and(Ch<=55203)then begin if((Ch-44032)mod 28)=0then result:=gbpLV else result:=gbpLVT end else
+if((Ch>=4352)and(Ch<=4447))or
+((Ch>=43360)and(Ch<=43388))then result:=gbpL else
+if((Ch>=4448)and(Ch<=4519))or
+((Ch>=55216)and(Ch<=55238))then result:=gbpV else
+if((Ch>=4520)and(Ch<=4607))or
+((Ch>=55243)and(Ch<=55291))then result:=gbpT else
+if(Ch=10084)or
+(Ch=128139)or
+(Ch=128488)then result:=gbpGlue_After_Zwj else
+if((Ch>=1536)and(Ch<=1541))or
+(Ch=1757)or
+(Ch=1807)or
+(Ch=2274)or
+(Ch=3406)or
+(Ch=69821)or
+((Ch>=70082)and(Ch<=70083))then result:=gbpPrepend else
+if((Ch>=0)and(Ch<=31))or
+((Ch>=127)and(Ch<=159))or
+(Ch=173)or
+(Ch=1564)or
+(Ch=6158)or
+(Ch=8203)or
+((Ch>=8206)and(Ch<=8207))or
+((Ch>=8232)and(Ch<=8238))or
+((Ch>=8288)and(Ch<=8303))or
+((Ch>=55296)and(Ch<=57343))or
+(Ch=65279)or
+((Ch>=65520)and(Ch<=65531))or
+((Ch>=113824)and(Ch<=113827))or
+((Ch>=119155)and(Ch<=119162))or
+((Ch>=917504)and(Ch<=917535))or
+((Ch>=917632)and(Ch<=917759))or
+((Ch>=918000)and(Ch<=921599))then result:=gbpControl else
+if(Ch=9757)or
+(Ch=9977)or
+((Ch>=9994)and(Ch<=9997))or
+(Ch=127877)or
+((Ch>=127939)and(Ch<=127940))or
+((Ch>=127946)and(Ch<=127947))or
+((Ch>=128066)and(Ch<=128067))or
+((Ch>=128070)and(Ch<=128080))or
+(Ch=128110)or
+((Ch>=128112)and(Ch<=128120))or
+(Ch=128124)or
+((Ch>=128129)and(Ch<=128131))or
+((Ch>=128133)and(Ch<=128135))or
+(Ch=128170)or
+(Ch=128373)or
+(Ch=128378)or
+(Ch=128400)or
+((Ch>=128405)and(Ch<=128406))or
+((Ch>=128581)and(Ch<=128583))or
+((Ch>=128587)and(Ch<=128591))or
+(Ch=128675)or
+((Ch>=128692)and(Ch<=128694))or
+(Ch=128704)or
+((Ch>=129304)and(Ch<=129310))or
+(Ch=129318)or
+(Ch=129328)or
+((Ch>=129331)and(Ch<=129337))or
+((Ch>=129340)and(Ch<=129342))then result:=gbpE_Base else
+case Ch of
+768..879,
+1155..1161,
+1425..1469,
+1471,
+1473..1474,
+1476..1477,
+1479,
+1552..1562,
+1611..1631,
+1648,
+1750..1756,
+1759..1764,
+1767..1768,
+1770..1773,
+1809,
+1840..1866,
+1958..1968,
+2027..2035,
+2070..2073,
+2075..2083,
+2085..2087,
+2089..2093,
+2137..2139,
+2260..2306,
+2362,
+2364,
+2369..2376,
+2381,
+2385..2391,
+2402..2403,
+2433,
+2492,
+2494,
+2497..2500,
+2509,
+2519,
+2530..2531,
+2561..2562,
+2620,
+2625..2626,
+2631..2632,
+2635..2637,
+2641,
+2672..2673,
+2677,
+2689..2690,
+2748,
+2753..2757,
+2759..2760,
+2765,
+2786..2787,
+2817,
+2876,
+2878..2879,
+2881..2884,
+2893,
+2902..2903,
+2914..2915,
+2946,
+3006,
+3008,
+3021,
+3031,
+3072,
+3134..3136,
+3142..3144,
+3146..3149,
+3157..3158,
+3170..3171,
+3201,
+3260,
+3263,
+3266,
+3270,
+3276..3277,
+3285..3286,
+3298..3299,
+3329,
+3390,
+3393..3396,
+3405,
+3415,
+3426..3427,
+3530,
+3535,
+3538..3540,
+3542,
+3551,
+3633,
+3636..3642,
+3655..3662,
+3761,
+3764..3769,
+3771..3772,
+3784..3789,
+3864..3865,
+3893,
+3895,
+3897,
+3953..3966,
+3968..3972,
+3974..3975,
+3981..3991,
+3993..4028,
+4038,
+4141..4144,
+4146..4151,
+4153..4154,
+4157..4158,
+4184..4185,
+4190..4192,
+4209..4212,
+4226,
+4229..4230,
+4237,
+4253,
+4957..4959,
+5906..5908,
+5938..5940,
+5970..5971,
+6002..6003,
+6068..6069,
+6071..6077,
+6086,
+6089..6099,
+6109,
+6155..6157,
+6277..6278,
+6313,
+6432..6434,
+6439..6440,
+6450,
+6457..6459,
+6679..6680,
+6683,
+6742,
+6744..6750,
+6752,
+6754,
+6757..6764,
+6771..6780,
+6783,
+6832..6846,
+6912..6915,
+6964,
+6966..6970,
+6972,
+6978,
+7019..7027,
+7040..7041,
+7074..7077,
+7080..7081,
+7083..7085,
+7142,
+7144..7145,
+7149,
+7151..7153,
+7212..7219,
+7222..7223,
+7376..7378,
+7380..7392,
+7394..7400,
+7405,
+7412,
+7416..7417,
+7616..7669,
+7675..7679,
+8204,
+8400..8432,
+11503..11505,
+11647,
+11744..11775,
+12330..12335,
+12441..12442,
+42607..42610,
+42612..42621,
+42654..42655,
+42736..42737,
+43010,
+43014,
+43019,
+43045..43046,
+43204..43205,
+43232..43249,
+43302..43309,
+43335..43345,
+43392..43394,
+43443,
+43446..43449,
+43452,
+43493,
+43561..43566,
+43569..43570,
+43573..43574,
+43587,
+43596,
+43644,
+43696,
+43698..43700,
+43703..43704,
+43710..43711,
+43713,
+43756..43757,
+43766,
+44005,
+44008,
+44013,
+64286,
+65024..65039,
+65056..65071,
+65438..65439,
+66045,
+66272,
+66422..66426,
+68097..68099,
+68101..68102,
+68108..68111,
+68152..68154,
+68159,
+68325..68326,
+69633,
+69688..69702,
+69759..69761,
+69811..69814,
+69817..69818,
+69888..69890,
+69927..69931,
+69933..69940,
+70003,
+70016..70017,
+70070..70078,
+70090..70092,
+70191..70193,
+70196,
+70198..70199,
+70206,
+70367,
+70371..70378,
+70400..70401,
+70460,
+70462,
+70464,
+70487,
+70502..70508,
+70512..70516,
+70712..70719,
+70722..70724,
+70726,
+70832,
+70835..70840,
+70842,
+70845,
+70847..70848,
+70850..70851,
+71087,
+71090..71093,
+71100..71101,
+71103..71104,
+71132..71133,
+71219..71226,
+71229,
+71231..71232,
+71339,
+71341,
+71344..71349,
+71351,
+71453..71455,
+71458..71461,
+71463..71467,
+72752..72758,
+72760..72765,
+72767,
+72850..72871,
+72874..72880,
+72882..72883,
+72885..72886,
+92912..92916,
+92976..92982,
+94095..94098,
+113821..113822,
+119141,
+119143..119145,
+119150..119170,
+119173..119179,
+119210..119213,
+119362..119364,
+121344..121398,
+121403..121452,
+121461,
+121476,
+121499..121503,
+121505..121519,
+122880..122886,
+122888..122904,
+122907..122913,
+122915..122916,
+122918..122922,
+125136..125142,
+125252..125258,
+917536..917999:result:=gbpExtend;
+2307,
+2363,
+2366..2368,
+2377..2380,
+2382..2383,
+2434..2435,
+2495..2496,
+2503..2504,
+2507..2508,
+2563,
+2622..2624,
+2691,
+2750..2752,
+2761,
+2763..2764,
+2818..2819,
+2880,
+2887..2888,
+2891..2892,
+3007,
+3009..3010,
+3014..3016,
+3018..3020,
+3073..3075,
+3137..3140,
+3202..3203,
+3262,
+3264..3265,
+3267..3268,
+3271..3272,
+3274..3275,
+3330..3331,
+3391..3392,
+3398..3400,
+3402..3404,
+3458..3459,
+3536..3537,
+3544..3550,
+3570..3571,
+3635,
+3763,
+3902..3903,
+3967,
+4145,
+4155..4156,
+4182..4183,
+4228,
+6070,
+6078..6085,
+6087..6088,
+6435..6438,
+6441..6443,
+6448..6449,
+6451..6456,
+6681..6682,
+6741,
+6743,
+6765..6770,
+6916,
+6965,
+6971,
+6973..6977,
+6979..6980,
+7042,
+7073,
+7078..7079,
+7082,
+7143,
+7146..7148,
+7150,
+7154..7155,
+7204..7211,
+7220..7221,
+7393,
+7410..7411,
+43043..43044,
+43047,
+43136..43137,
+43188..43203,
+43346..43347,
+43395,
+43444..43445,
+43450..43451,
+43453..43456,
+43567..43568,
+43571..43572,
+43597,
+43755,
+43758..43759,
+43765,
+44003..44004,
+44006..44007,
+44009..44010,
+44012,
+69632,
+69634,
+69762,
+69808..69810,
+69815..69816,
+69932,
+70018,
+70067..70069,
+70079..70080,
+70188..70190,
+70194..70195,
+70197,
+70368..70370,
+70402..70403,
+70463,
+70465..70468,
+70471..70472,
+70475..70477,
+70498..70499,
+70709..70711,
+70720..70721,
+70725,
+70833..70834,
+70841,
+70843..70844,
+70846,
+70849,
+71088..71089,
+71096..71099,
+71102,
+71216..71218,
+71227..71228,
+71230,
+71340,
+71342..71343,
+71350,
+71456..71457,
+71462,
+72751,
+72766,
+72873,
+72881,
+72884,
+94033..94078,
+119142,
+119149:result:=gbpSpacingMark;
+else result:=gbpOther end

+ 12 - 1
rtl/embedded/arm/cortexm4f_start.inc

@@ -43,6 +43,14 @@ asm
   str r1, [r0]
   str r1, [r0]
 {$endif REMAP_VECTTAB}
 {$endif REMAP_VECTTAB}
 
 
+{$if defined(FPUARM_HAS_VFP_EXTENSION)}
+  ldr r0, .Lcpacr
+  ldr r1, [r0]
+  orr r1, r1, #0xf00000
+  str r1, [r0]
+  dsb
+  isb
+{$endif defined(FPUARM_HAS_VFP_EXTENSION)}
   bl PASCALMAIN
   bl PASCALMAIN
   b HaltProc
   b HaltProc
 
 
@@ -56,10 +64,13 @@ asm
   .long _data
   .long _data
 .L_edata:
 .L_edata:
   .long _edata
   .long _edata
+{$if defined(FPUARM_HAS_VFP_EXTENSION)}
+.Lcpacr:
+  .long 0xE000ED88
+{$endif defined(FPUARM_HAS_VFP_EXTENSION)}
 {$ifdef REMAP_VECTTAB}
 {$ifdef REMAP_VECTTAB}
 .Lvtor:
 .Lvtor:
   .long 0xE000ED08
   .long 0xE000ED08
 .Ltext_start:
 .Ltext_start:
   .long _text_start
   .long _text_start
 {$endif REMAP_VECTTAB}
 {$endif REMAP_VECTTAB}
-end;

+ 13 - 1
rtl/freertos/arm/cortexm4f_start.inc

@@ -43,6 +43,14 @@ asm
   str r1, [r0]
   str r1, [r0]
 {$endif REMAP_VECTTAB}
 {$endif REMAP_VECTTAB}
 
 
+{$if defined(FPUARM_HAS_VFP_EXTENSION)}
+  ldr r0, .Lcpacr
+  ldr r1, [r0]
+  orr r1, r1, #0xf00000
+  str r1, [r0]
+  dsb
+  isb
+{$endif defined(FPUARM_HAS_VFP_EXTENSION)}
   bl PASCALMAIN
   bl PASCALMAIN
   b HaltProc
   b HaltProc
 
 
@@ -56,10 +64,14 @@ asm
   .long _data
   .long _data
 .L_edata:
 .L_edata:
   .long _edata
   .long _edata
+{$if not defined(FPUARM_HAS_VFP_EXTENSION)}
+.Lcpacr:
+  .long 0xE000ED88
+{$endif defined(FPUARM_HAS_VFP_EXTENSION)}
 {$ifdef REMAP_VECTTAB}
 {$ifdef REMAP_VECTTAB}
 .Lvtor:
 .Lvtor:
   .long 0xE000ED08
   .long 0xE000ED08
 .Ltext_start:
 .Ltext_start:
   .long _text_start
   .long _text_start
 {$endif REMAP_VECTTAB}
 {$endif REMAP_VECTTAB}
-end;
+end;

+ 1 - 1
utils/unicode/cldrparser.lpr

@@ -54,7 +54,7 @@ const
     '         <HaltOnFail> may be one of (y, Y, t, T, 1) to halt the execution on the first failing.' + sLineBreak +
     '         <HaltOnFail> may be one of (y, Y, t, T, 1) to halt the execution on the first failing.' + sLineBreak +
     ' ' + sLineBreak +
     ' ' + sLineBreak +
     '  The program expects some files to be present in the <dataDir> folder : ' + sLineBreak +
     '  The program expects some files to be present in the <dataDir> folder : ' + sLineBreak +
-    '     - UCA_Rules_SHORT.xml ' + sLineBreak +
+    '     - UCA_Rules_SHORT.txt ' + sLineBreak +
     '     - allkeys.txt this is the file allkeys_CLDR.txt renamed to allkeys.txt' + sLineBreak +
     '     - allkeys.txt this is the file allkeys_CLDR.txt renamed to allkeys.txt' + sLineBreak +
     '  These files are in the core.zip file of the CLDR release files. The CLDR''version used should be synchronized the' + sLineBreak +
     '  These files are in the core.zip file of the CLDR release files. The CLDR''version used should be synchronized the' + sLineBreak +
     '  version of the Unicode version used, for example for Uniocde 7 it will be CLDR 26.' + sLineBreak +
     '  version of the Unicode version used, for example for Uniocde 7 it will be CLDR 26.' + sLineBreak +

+ 7 - 7
utils/unicode/data/readme.txt

@@ -1,13 +1,13 @@
 This folder requires the next files to be present:
 This folder requires the next files to be present:
 
 
-  Extracted from http://www.unicode.org/Public/6.2.0/ucd/UCD.zip:
-    * UnicodeData.txt 
+  Extracted from https://www.unicode.org/Public/zipped/9.0.0/UCD.zip:
+    * UnicodeData.txt
     * HangulSyllableType.txt
     * HangulSyllableType.txt
     * PropList.txt
     * PropList.txt
 
 
-  Extracted from http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip:
-    * allkeys.txt : this file is actually the allkeys_CLDR.txt file renamed. It is the CLDR's root collation.
-    * UCA_Rules_SHORT.xml
+#???  Extracted from http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip:
 
 
-  Extracted from http://www.unicode.org/Public/cldr/22/core.zip (see the "common\collation" folder):
-    * all the language specific xml files (de.xml, es.xml, ...)
+  Extracted from https://www.unicode.org/Public/cldr/30/core.zip
+    * allkeys.txt : this file is actually the allkeys_CLDR.txt file renamed. It is the CLDR's root collation.
+    * UCA_Rules_SHORT.txt
+    * all the language specific xml files (de.xml, es.xml, ...) (see the "common\collation" folder):

+ 1 - 0
utils/unicode/fpmake.pp

@@ -60,6 +60,7 @@ begin
 
 
     T:=P.Targets.AddProgram('cldrparser.lpr');
     T:=P.Targets.AddProgram('cldrparser.lpr');
     T:=P.Targets.AddProgram('unihelper.lpr');
     T:=P.Targets.AddProgram('unihelper.lpr');
+    T:=P.Targets.AddProgram('gbpparser.lpr');
 
 
     end;
     end;
 end;
 end;

+ 58 - 0
utils/unicode/gbpparser.lpi

@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<CONFIG>
+  <ProjectOptions>
+    <Version Value="11"/>
+    <General>
+      <Flags>
+        <MainUnitHasCreateFormStatements Value="False"/>
+        <MainUnitHasTitleStatement Value="False"/>
+        <MainUnitHasScaledStatement Value="False"/>
+      </Flags>
+      <SessionStorage Value="InProjectDir"/>
+      <MainUnit Value="0"/>
+      <Title Value="gbpparser"/>
+      <UseAppBundle Value="False"/>
+      <ResourceType Value="res"/>
+    </General>
+    <BuildModes Count="1">
+      <Item1 Name="Default" Default="True"/>
+    </BuildModes>
+    <PublishOptions>
+      <Version Value="2"/>
+      <UseFileFilters Value="True"/>
+    </PublishOptions>
+    <RunParams>
+      <FormatVersion Value="2"/>
+      <Modes Count="0"/>
+    </RunParams>
+    <Units Count="1">
+      <Unit0>
+        <Filename Value="gbpparser.lpr"/>
+        <IsPartOfProject Value="True"/>
+      </Unit0>
+    </Units>
+  </ProjectOptions>
+  <CompilerOptions>
+    <Version Value="11"/>
+    <Target>
+      <Filename Value="gbpparser"/>
+    </Target>
+    <SearchPaths>
+      <IncludeFiles Value="$(ProjOutDir)"/>
+      <UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
+    </SearchPaths>
+  </CompilerOptions>
+  <Debugging>
+    <Exceptions Count="3">
+      <Item1>
+        <Name Value="EAbort"/>
+      </Item1>
+      <Item2>
+        <Name Value="ECodetoolError"/>
+      </Item2>
+      <Item3>
+        <Name Value="EFOpenError"/>
+      </Item3>
+    </Exceptions>
+  </Debugging>
+</CONFIG>

+ 379 - 0
utils/unicode/gbpparser.lpr

@@ -0,0 +1,379 @@
+{ Parser and code generator for the GraphemeBreakProperty.
+
+  Copyright (C) 2021 Nikolay Nikolov <[email protected]>
+
+  This source is free software; you can redistribute it and/or modify it under
+  the terms of the GNU General Public License as published by the Free
+  Software Foundation; either version 2 of the License, or (at your option)
+  any later version.
+
+  This code is distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+  details.
+
+  A copy of the GNU General Public License is available on the World Wide Web
+  at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
+  to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+  Boston, MA 02110-1335, USA.
+}
+
+
+program gbpparser;
+
+{$mode objfpc}{$H+}
+
+uses
+  SysUtils, StrUtils;
+
+type
+  TGraphemeBreakProperty = (
+    gbpOther,
+    gbpPrepend,
+    gbpCR,
+    gbpLF,
+    gbpControl,
+    gbpExtend,
+    gpbRegional_Indicator,
+    gbpSpacingMark,
+    gbpL,
+    gbpV,
+    gbpT,
+    gbpLV,
+    gbpLVT,
+    gbpE_Base,
+    gbpE_Modifier,
+    gbpZWJ,
+    gbpGlue_After_Zwj,
+    gbpE_Base_GAZ);
+
+  TRange = record
+    RangeLo, RangeHi: UCS4Char;
+  end;
+  TRanges = array of TRange;
+
+var
+  GraphemeBreakProperties: array [UCS4Char] of TGraphemeBreakProperty;
+  GBPStats: array [TGraphemeBreakProperty] of record
+    Exists: Boolean;
+    Handled: Boolean;
+    MinValue: UCS4Char;
+    MaxValue: UCS4Char;
+    Count: LongInt;
+    Ranges: TRanges;
+  end;
+
+function ParseGraphemeBreakProperty(S: string): TGraphemeBreakProperty;
+begin
+  S := Trim(S);
+  case S of
+    'Prepend':
+      Result := gbpPrepend;
+    'CR':
+      Result := gbpCR;
+    'LF':
+      Result := gbpLF;
+    'Control':
+      Result := gbpControl;
+    'Extend':
+      Result := gbpExtend;
+    'Regional_Indicator':
+      Result := gpbRegional_Indicator;
+    'SpacingMark':
+      Result := gbpSpacingMark;
+    'L':
+      Result := gbpL;
+    'V':
+      Result := gbpV;
+    'T':
+      Result := gbpT;
+    'LV':
+      Result := gbpLV;
+    'LVT':
+      Result := gbpLVT;
+    'E_Base':
+      Result := gbpE_Base;
+    'E_Modifier':
+      Result := gbpE_Modifier;
+    'ZWJ':
+      Result := gbpZWJ;
+    'Glue_After_Zwj':
+      Result := gbpGlue_After_Zwj;
+    'E_Base_GAZ':
+      Result := gbpE_Base_GAZ;
+    else
+      raise EArgumentException('Unknown grapheme break property: ''' + S + '''');
+  end;
+end;
+
+procedure ParseRange(S: string; out RangeLo, RangeHi: UCS4Char);
+var
+  dp: SizeInt;
+begin
+  S := Trim(S);
+  dp := Pos('..', S);
+  if dp > 0 then
+  begin
+    RangeLo := StrToInt('$' + LeftStr(S, dp - 1));
+    RangeHi := StrToInt('$' + Copy(S, dp + 2, Length(S) - dp + 3));
+  end
+  else
+  begin
+    RangeLo := StrToInt('$' + S);
+    RangeHi := RangeLo;
+  end;
+end;
+
+procedure ParseGraphemeBreakProperties(const FileName: string);
+var
+  InF: TextFile;
+  S: string;
+  SplitS: TStringArray;
+  LineNr: Integer = 0;
+  gbp: TGraphemeBreakProperty;
+  RangeLo, RangeHi, R: UCS4Char;
+begin
+  if not FileExists(FileName) then
+  begin
+    Writeln('File doesn''t exist: ', FileName);
+    Halt(1);
+  end;
+  AssignFile(InF, FileName);
+  Reset(InF);
+  while not EoF(InF) do
+  begin
+    Inc(LineNr);
+    Readln(InF, S);
+    S := Trim(S);
+    if Pos('#', S) > 0 then
+      S := LeftStr(S, Pos('#', S) - 1);
+    if S <> '' then
+    begin
+      SplitS := S.Split([';']);
+      if Length(SplitS) <> 2 then
+        raise Exception.Create('Invalid number of ; separators on line ' + IntToStr(LineNr));
+      ParseRange(SplitS[0], RangeLo, RangeHi);
+      gbp := ParseGraphemeBreakProperty(SplitS[1]);
+      for R := RangeLo to RangeHi do
+        GraphemeBreakProperties[R] := gbp;
+    end;
+  end;
+  CloseFile(InF);
+end;
+
+procedure CalcStatsAndRanges;
+var
+  Ch: UCS4Char;
+  gbp, prev_gbp: TGraphemeBreakProperty;
+begin
+  FillChar(GBPStats, SizeOf(GBPStats), 0);
+  gbp := Low(TGraphemeBreakProperty);
+  for Ch := Low(UCS4Char) to High(UCS4Char) do
+  begin
+    prev_gbp := gbp;
+    gbp := GraphemeBreakProperties[Ch];
+    with GBPStats[gbp] do
+    begin
+      if not Exists then
+      begin
+        Exists := True;
+        MinValue := Ch;
+        MaxValue := Ch;
+        Count := 1;
+        SetLength(Ranges, 1);
+        Ranges[0].RangeLo := Ch;
+        Ranges[0].RangeHi := Ch;
+      end
+      else
+      begin
+        MaxValue := Ch;
+        Inc(Count);
+        if prev_gbp <> gbp then
+        begin
+          SetLength(Ranges, Length(Ranges) + 1);
+          with Ranges[High(Ranges)] do
+          begin
+            RangeLo := Ch;
+            RangeHi := Ch;
+          end;
+        end
+        else
+          Ranges[High(Ranges)].RangeHi := Ch;
+      end;
+    end;
+  end;
+end;
+
+procedure MaybeCoalesceRanges(RLo, RHi: UCS4Char);
+var
+  gbp: TGraphemeBreakProperty;
+  RI: Integer;
+begin
+  for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    if GBPStats[gbp].Exists and (not GBPStats[gbp].Handled) then
+    begin
+      for RI := 0 to High(GBPStats[gbp].Ranges) - 1 do
+        if (GBPStats[gbp].Ranges[RI].RangeHi = (RLo - 1)) and
+           (GBPStats[gbp].Ranges[RI + 1].RangeLo = (RHi + 1)) then
+        begin
+          GBPStats[gbp].Ranges[RI].RangeHi := GBPStats[gbp].Ranges[RI + 1].RangeHi;
+          Delete(GBPStats[gbp].Ranges, RI + 1, 1);
+          exit;
+        end;
+    end;
+end;
+
+function FindMinRangeCount: Integer;
+var
+  gbp: TGraphemeBreakProperty;
+begin
+  Result := High(Integer);
+  for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    if GBPStats[gbp].Exists and (not GBPStats[gbp].Handled) and (Length(GBPStats[gbp].Ranges) < Result) then
+      Result := Length(GBPStats[gbp].Ranges);
+end;
+
+function ApplyLV_LVTCompression: Boolean;
+const
+  RangeLo = 44032;
+  RangeHi = 55203;
+var
+  Ch: UCS4Char;
+begin
+  Result := False;
+  if (GBPStats[gbpLV].MinValue <> RangeLo) or (GBPStats[gbpLV].MaxValue <> (RangeHi - 27)) or
+     (GBPStats[gbpLVT].MinValue <> (RangeLo + 1)) or (GBPStats[gbpLVT].MaxValue <> RangeHi) then
+    exit;
+  for Ch := RangeLo to RangeHi do
+  begin
+    if ((Ch - RangeLo) mod 28) = 0 then
+    begin
+      if GraphemeBreakProperties[Ch] <> gbpLV then
+        exit;
+    end
+    else
+    begin
+      if GraphemeBreakProperties[Ch] <> gbpLVT then
+        exit;
+    end;
+  end;
+  Result := True;
+end;
+
+procedure GenCode(const OutFileName: string);
+const
+  RangeCountThreshold = 30{400};
+var
+  gbp: TGraphemeBreakProperty;
+  RI, NextRangeCount: Integer;
+  OutFile: TextFile;
+begin
+  Writeln('Generating file: ', OutFileName);
+
+  AssignFile(OutFile, OutFileName);
+  Rewrite(OutFile);
+
+  Writeln(OutFile, '{ do not edit, this file is autogenerated by the gbpparser tool }');
+
+  { unused properties are already handled }
+  for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    if not GBPStats[gbp].Exists then
+      GBPStats[gbp].Handled := True;
+
+  { handle single codepoints first }
+  for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    if (not GBPStats[gbp].Handled) and (GBPStats[gbp].Count = 1) then
+    begin
+      if GBPStats[gbp].MinValue <> GBPStats[gbp].MaxValue then
+        raise Exception.Create('Internal error');
+      Writeln(OutFile, 'if Ch=', GBPStats[gbp].MinValue, 'then result:=',gbp,' else');
+      GBPStats[gbp].Handled := True;
+      MaybeCoalesceRanges(GBPStats[gbp].MinValue, GBPStats[gbp].MaxValue);
+    end;
+
+  { handle single range codepoints next }
+  while FindMinRangeCount = 1 do
+    for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+      if (not GBPStats[gbp].Handled) and (Length(GBPStats[gbp].Ranges) = 1) then
+      begin
+        Writeln(OutFile, 'if(Ch>=', GBPStats[gbp].MinValue, ')and(Ch<=', GBPStats[gbp].MaxValue, ')then result:=',gbp,' else');
+        GBPStats[gbp].Handled := True;
+        MaybeCoalesceRanges(GBPStats[gbp].MinValue, GBPStats[gbp].MaxValue);
+      end;
+
+  if ApplyLV_LVTCompression then
+  begin
+    Writeln(OutFile, 'if(Ch>=44032)and(Ch<=55203)then begin if((Ch-44032)mod 28)=0then result:=gbpLV else result:=gbpLVT end else');
+    GBPStats[gbpLV].Handled := True;
+    GBPStats[gbpLVT].Handled := True;
+  end;
+
+  repeat
+    NextRangeCount := FindMinRangeCount;
+    if NextRangeCount <= RangeCountThreshold then
+      for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+      begin
+        if not GBPStats[gbp].Handled and (Length(GBPStats[gbp].Ranges) <= NextRangeCount) then
+        begin
+          GBPStats[gbp].Handled := True;
+          Write(OutFile, 'if');
+          for RI := 0 to High(GBPStats[gbp].Ranges) do
+          begin
+            if RI <> 0 then
+              Writeln(OutFile, 'or');
+            with GBPStats[gbp].Ranges[RI] do
+            begin
+              if RangeLo = RangeHi then
+                Write(OutFile, '(Ch=', RangeLo, ')')
+              else
+                Write(OutFile, '((Ch>=', RangeLo, ')and(Ch<=', RangeHi, '))');
+              MaybeCoalesceRanges(RangeLo, RangeHi);
+            end;
+          end;
+          Writeln(OutFile, 'then result:=',gbp,' else');
+        end;
+      end;
+  until NextRangeCount > RangeCountThreshold;
+
+  if NextRangeCount <> High(Integer) then
+  begin
+    //for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    //  if not GBPStats[gbp].Handled then
+    //    Writeln(gbp, ' ', GBPStats[gbp].MinValue, '..', GBPStats[gbp].MaxValue, ' ', GBPStats[gbp].Count, ' ', Length(GBPStats[gbp].Ranges), ' ', (GBPStats[gbp].MaxValue - GBPStats[gbp].MinValue + 7) div 8);
+    Writeln(OutFile, 'case Ch of');
+    for gbp := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
+    begin
+      if not GBPStats[gbp].Handled then
+      begin
+        GBPStats[gbp].Handled := True;
+        for RI := 0 to High(GBPStats[gbp].Ranges) do
+        begin
+          if RI <> 0 then
+            Writeln(OutFile, ',');
+          with GBPStats[gbp].Ranges[RI] do
+          begin
+            if RangeLo = RangeHi then
+              Write(OutFile, RangeLo)
+            else
+              Write(OutFile, RangeLo, '..', RangeHi);
+          end;
+        end;
+        Writeln(OutFile, ':result:=', gbp, ';');
+      end;
+    end;
+    Writeln(OutFile, 'else result:=gbpOther end');
+  end
+  else
+    Writeln(OutFile, 'result:=gbpOther');
+
+  CloseFile(OutFile);
+end;
+
+begin
+  FillChar(GraphemeBreakProperties, SizeOf(GraphemeBreakProperties), 0);
+  ParseGraphemeBreakProperties('data/UCD/auxiliary/GraphemeBreakProperty.txt');
+  CalcStatsAndRanges;
+  GenCode('graphemebreakproperty_code.inc');
+  Writeln('Done');
+end.
+

+ 8 - 8
utils/unicode/parse-collations.bat

@@ -1,17 +1,17 @@
-cldrparser.exe de.xml -d.\data -o.\data
+cldrparser.exe de -d.\data -o.\data
 echo
 echo
-cldrparser es.xml -d.\data -o.\data
+cldrparser es -d.\data -o.\data
 echo
 echo
-cldrparser fr_CA.xml -d.\data -o.\data
+cldrparser fr_CA -d.\data -o.\data
 echo
 echo
-cldrparser ja.xml -d.\data -o.\data
+cldrparser ja -d.\data -o.\data
 echo
 echo
-cldrparser ko.xml -d.\data -o.\data
+cldrparser ko -d.\data -o.\data
 echo
 echo
-cldrparser ru.xml -d.\data -o.\data
+cldrparser ru -d.\data -o.\data
 echo
 echo
-cldrparser sv.xml -d.\data -o.\data
+cldrparser sv -d.\data -o.\data
 echo
 echo
-cldrparser zh.xml -d.\data -o.\data
+cldrparser zh -d.\data -o.\data
 
 
 pause
 pause

+ 8 - 8
utils/unicode/parse-collations.sh

@@ -1,18 +1,18 @@
 #!/bin/bash
 #!/bin/bash
-./cldrparser de.xml -d./data -o./data
+./cldrparser de -d./data -o./data
 echo
 echo
-./cldrparser es.xml -d./data -o./data
+./cldrparser es -d./data -o./data
 echo
 echo
-./cldrparser fr_CA.xml -d./data -o./data
+./cldrparser fr_CA -d./data -o./data
 echo
 echo
-./cldrparser ja.xml -d./data -o./data
+./cldrparser ja -d./data -o./data
 echo
 echo
-./cldrparser ko.xml -d./data -o./data
+./cldrparser ko -d./data -o./data
 echo
 echo
-./cldrparser ru.xml -d./data -o./data
+./cldrparser ru -d./data -o./data
 echo
 echo
-./cldrparser sv.xml -d./data -o./data
+./cldrparser sv -d./data -o./data
 echo
 echo
-./cldrparser zh.xml -d./data -o./data
+./cldrparser zh -d./data -o./data
 
 
 read -p "Press [Enter] key to continue ..."
 read -p "Press [Enter] key to continue ..."