unicodedata.pas 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit unicodedata;
  17. {$mode delphi}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$SCOPEDENUMS ON}
  21. {$pointermath on}
  22. {$define USE_INLINE}
  23. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  24. { $define uni_debug}
  25. interface
  26. const
  27. MAX_WORD = High(Word);
  28. LOW_SURROGATE_BEGIN = Word($DC00);
  29. LOW_SURROGATE_END = Word($DFFF);
  30. HIGH_SURROGATE_BEGIN = Word($D800);
  31. HIGH_SURROGATE_END = Word($DBFF);
  32. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  33. UCS4_HALF_BASE = LongWord($10000);
  34. UCS4_HALF_MASK = Word($3FF);
  35. MAX_LEGAL_UTF32 = $10FFFF;
  36. const
  37. // Unicode General Category
  38. UGC_UppercaseLetter = 0;
  39. UGC_LowercaseLetter = 1;
  40. UGC_TitlecaseLetter = 2;
  41. UGC_ModifierLetter = 3;
  42. UGC_OtherLetter = 4;
  43. UGC_NonSpacingMark = 5;
  44. UGC_CombiningMark = 6;
  45. UGC_EnclosingMark = 7;
  46. UGC_DecimalNumber = 8;
  47. UGC_LetterNumber = 9;
  48. UGC_OtherNumber = 10;
  49. UGC_ConnectPunctuation = 11;
  50. UGC_DashPunctuation = 12;
  51. UGC_OpenPunctuation = 13;
  52. UGC_ClosePunctuation = 14;
  53. UGC_InitialPunctuation = 15;
  54. UGC_FinalPunctuation = 16;
  55. UGC_OtherPunctuation = 17;
  56. UGC_MathSymbol = 18;
  57. UGC_CurrencySymbol = 19;
  58. UGC_ModifierSymbol = 20;
  59. UGC_OtherSymbol = 21;
  60. UGC_SpaceSeparator = 22;
  61. UGC_LineSeparator = 23;
  62. UGC_ParagraphSeparator = 24;
  63. UGC_Control = 25;
  64. UGC_Format = 26;
  65. UGC_Surrogate = 27;
  66. UGC_PrivateUse = 28;
  67. UGC_Unassigned = 29;
  68. type
  69. TUInt24Rec = packed record
  70. public
  71. {$ifdef FPC_LITTLE_ENDIAN}
  72. byte0, byte1, byte2 : Byte;
  73. {$else FPC_LITTLE_ENDIAN}
  74. byte2, byte1, byte0 : Byte;
  75. {$endif FPC_LITTLE_ENDIAN}
  76. public
  77. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  78. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  79. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  80. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  81. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  82. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  83. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  84. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  85. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  86. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  87. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  88. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  89. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  90. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  91. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  92. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  93. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  94. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  95. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  96. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  97. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  98. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  99. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  100. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. end;
  107. UInt24 = TUInt24Rec;
  108. PUInt24 = ^UInt24;
  109. const
  110. ZERO_UINT24 : UInt24 =
  111. {$ifdef FPC_LITTLE_ENDIAN}
  112. (byte0 : 0; byte1 : 0; byte2 : 0;);
  113. {$else FPC_LITTLE_ENDIAN}
  114. (byte2 : 0; byte1 : 0; byte0 : 0;);
  115. {$endif FPC_LITTLE_ENDIAN}
  116. type
  117. PUC_Prop = ^TUC_Prop;
  118. { TUC_Prop }
  119. TUC_Prop = packed record
  120. private
  121. function GetCategory : Byte;inline;
  122. procedure SetCategory(AValue : Byte);
  123. function GetWhiteSpace : Boolean;inline;
  124. procedure SetWhiteSpace(AValue : Boolean);
  125. function GetHangulSyllable : Boolean;inline;
  126. procedure SetHangulSyllable(AValue : Boolean);
  127. function GetNumericValue: Double;inline;
  128. public
  129. CategoryData : Byte;
  130. public
  131. CCC : Byte;
  132. NumericIndex : Byte;
  133. SimpleUpperCase : UInt24;
  134. SimpleLowerCase : UInt24;
  135. DecompositionID : SmallInt;
  136. public
  137. property Category : Byte read GetCategory write SetCategory;
  138. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  139. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  140. property NumericValue : Double read GetNumericValue;
  141. end;
  142. type
  143. TUCA_PropWeights = packed record
  144. Weights : array[0..2] of Word;
  145. end;
  146. PUCA_PropWeights = ^TUCA_PropWeights;
  147. TUCA_PropItemContextRec = packed record
  148. public
  149. CodePointCount : Byte;
  150. WeightCount : Byte;
  151. public
  152. function GetCodePoints() : PUInt24;inline;
  153. function GetWeights() : PUCA_PropWeights;inline;
  154. end;
  155. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  156. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  157. TUCA_PropItemContextTreeNodeRec = packed record
  158. public
  159. Left : Word;
  160. Right : Word;
  161. Data : TUCA_PropItemContextRec;
  162. public
  163. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  164. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  165. end;
  166. { TUCA_PropItemContextTreeRec }
  167. TUCA_PropItemContextTreeRec = packed record
  168. public
  169. Size : UInt24;
  170. public
  171. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  172. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  173. function Find(
  174. const AChars : PUInt24;
  175. const ACharCount : Integer;
  176. out ANode : PUCA_PropItemContextTreeNodeRec
  177. ) : Boolean;
  178. end;
  179. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  180. { TUCA_PropItemRec }
  181. TUCA_PropItemRec = packed record
  182. private
  183. const FLAG_VALID = 0;
  184. const FLAG_CODEPOINT = 1;
  185. const FLAG_CONTEXTUAL = 2;
  186. const FLAG_DELETION = 3;
  187. const FLAG_COMPRESS_WEIGHT_1 = 6;
  188. const FLAG_COMPRESS_WEIGHT_2 = 7;
  189. private
  190. function GetCodePoint() : UInt24;inline;
  191. public
  192. WeightLength : Byte;
  193. ChildCount : Byte;
  194. Size : Word;
  195. Flags : Byte;
  196. public
  197. function HasCodePoint() : Boolean;inline;
  198. property CodePoint : UInt24 read GetCodePoint;
  199. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  200. function IsValid() : Boolean;inline;
  201. //function GetWeightArray() : PUCA_PropWeights;inline;
  202. procedure GetWeightArray(ADest : PUCA_PropWeights);
  203. function GetSelfOnlySize() : Cardinal;inline;
  204. function GetContextual() : Boolean;inline;
  205. property Contextual : Boolean read GetContextual;
  206. function GetContext() : PUCA_PropItemContextTreeRec;
  207. function IsDeleted() : Boolean;inline;
  208. function IsWeightCompress_1() : Boolean;inline;
  209. function IsWeightCompress_2() : Boolean;inline;
  210. end;
  211. PUCA_PropItemRec = ^TUCA_PropItemRec;
  212. TUCA_VariableKind = (
  213. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  214. ucaIgnoreSP // This one is not implemented !
  215. );
  216. TCollationName = string[128];
  217. PUCA_DataBook = ^TUCA_DataBook;
  218. TUCA_DataBook = packed record
  219. public
  220. Base : PUCA_DataBook;
  221. Version : TCollationName;
  222. CollationName : TCollationName;
  223. VariableWeight : TUCA_VariableKind;
  224. Backwards : array[0..3] of Boolean;
  225. BMP_Table1 : PByte;
  226. BMP_Table2 : PUInt24;
  227. OBMP_Table1 : PWord;
  228. OBMP_Table2 : PUInt24;
  229. PropCount : Integer;
  230. Props : PUCA_PropItemRec;
  231. VariableLowLimit : Word;
  232. VariableHighLimit : Word;
  233. public
  234. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  235. end;
  236. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  237. TCollationFields = set of TCollationField;
  238. const
  239. ROOT_COLLATION_NAME = 'DUCET';
  240. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  241. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  242. function UnicodeIsSurrogatePair(
  243. const AHighSurrogate,
  244. ALowSurrogate : UnicodeChar
  245. ) : Boolean;inline;
  246. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  247. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  248. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  249. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  250. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  251. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  252. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  253. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  254. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  255. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  256. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  257. type
  258. TUCASortKeyItem = Word;
  259. TUCASortKey = array of TUCASortKeyItem;
  260. function ComputeSortKey(
  261. const AString : UnicodeString;
  262. const ACollation : PUCA_DataBook
  263. ) : TUCASortKey;inline;overload;
  264. function ComputeSortKey(
  265. const AStr : PUnicodeChar;
  266. const ALength : SizeInt;
  267. const ACollation : PUCA_DataBook
  268. ) : TUCASortKey;overload;
  269. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  270. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  271. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  272. function UnregisterCollation(const AName : ansistring): Boolean;
  273. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  274. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  275. function GetCollationCount() : Integer;
  276. procedure PrepareCollation(
  277. ACollation : PUCA_DataBook;
  278. const ABaseName : ansistring;
  279. const AChangedFields : TCollationFields
  280. );
  281. resourcestring
  282. SCollationNotFound = 'Collation not found : "%s".';
  283. implementation
  284. uses
  285. unicodenumtable;
  286. type
  287. TCardinalRec = packed record
  288. {$ifdef FPC_LITTLE_ENDIAN}
  289. byte0, byte1, byte2, byte3 : Byte;
  290. {$else FPC_LITTLE_ENDIAN}
  291. byte3, byte2, byte1, byte0 : Byte;
  292. {$endif FPC_LITTLE_ENDIAN}
  293. end;
  294. TWordRec = packed record
  295. {$ifdef FPC_LITTLE_ENDIAN}
  296. byte0, byte1 : Byte;
  297. {$else FPC_LITTLE_ENDIAN}
  298. byte1, byte0 : Byte;
  299. {$endif FPC_LITTLE_ENDIAN}
  300. end;
  301. { TUInt24Rec }
  302. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  303. begin
  304. TCardinalRec(Result).byte0 := a.byte0;
  305. TCardinalRec(Result).byte1 := a.byte1;
  306. TCardinalRec(Result).byte2 := a.byte2;
  307. TCardinalRec(Result).byte3 := 0;
  308. end;
  309. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  310. begin
  311. Result := Cardinal(a);
  312. end;
  313. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  314. begin
  315. {$IFOPT R+}
  316. if (a > $FFFF) then
  317. Error(reIntOverflow);
  318. {$ENDIF R+}
  319. TWordRec(Result).byte0 := a.byte0;
  320. TWordRec(Result).byte1 := a.byte1;
  321. end;
  322. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  323. begin
  324. {$IFOPT R+}
  325. if (a > $FF) then
  326. Error(reIntOverflow);
  327. {$ENDIF R+}
  328. Result := a.byte0;
  329. end;
  330. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  331. begin
  332. {$IFOPT R+}
  333. if (a > $FFFFFF) then
  334. Error(reIntOverflow);
  335. {$ENDIF R+}
  336. Result.byte0 := TCardinalRec(a).byte0;
  337. Result.byte1 := TCardinalRec(a).byte1;
  338. Result.byte2 := TCardinalRec(a).byte2;
  339. end;
  340. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  341. begin
  342. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  343. end;
  344. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  345. begin
  346. Result := (TCardinalRec(b).byte3 = 0) and
  347. (a.byte0 = TCardinalRec(b).byte0) and
  348. (a.byte1 = TCardinalRec(b).byte1) and
  349. (a.byte2 = TCardinalRec(b).byte2);
  350. end;
  351. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  352. begin
  353. Result := (b = a);
  354. end;
  355. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  356. begin
  357. Result := (LongInt(a) = b);
  358. end;
  359. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  360. begin
  361. Result := (b = a);
  362. end;
  363. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  364. begin
  365. Result := (a.byte2 = 0) and
  366. (a.byte0 = TWordRec(b).byte0) and
  367. (a.byte1 = TWordRec(b).byte1);
  368. end;
  369. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  370. begin
  371. Result := (b = a);
  372. end;
  373. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  374. begin
  375. Result := (a.byte2 = 0) and
  376. (a.byte1 = 0) and
  377. (a.byte0 = b);
  378. end;
  379. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  380. begin
  381. Result := (b = a);
  382. end;
  383. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  384. begin
  385. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  386. end;
  387. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  388. begin
  389. Result := (TCardinalRec(b).byte3 <> 0) or
  390. (a.byte0 <> TCardinalRec(b).byte0) or
  391. (a.byte1 <> TCardinalRec(b).byte1) or
  392. (a.byte2 <> TCardinalRec(b).byte2);
  393. end;
  394. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  395. begin
  396. Result := (b <> a);
  397. end;
  398. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  399. begin
  400. Result := (a.byte2 > b.byte2) or
  401. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  402. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  403. end;
  404. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  405. begin
  406. Result := Cardinal(a) > b;
  407. end;
  408. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  409. begin
  410. Result := a > Cardinal(b);
  411. end;
  412. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  413. begin
  414. Result := (a.byte2 > b.byte2) or
  415. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  416. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  417. end;
  418. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  419. begin
  420. Result := Cardinal(a) >= b;
  421. end;
  422. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  423. begin
  424. Result := a >= Cardinal(b);
  425. end;
  426. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  427. begin
  428. Result := (b > a);
  429. end;
  430. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  431. begin
  432. Result := Cardinal(a) < b;
  433. end;
  434. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  435. begin
  436. Result := a < Cardinal(b);
  437. end;
  438. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  439. begin
  440. Result := (b >= a);
  441. end;
  442. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  443. begin
  444. Result := Cardinal(a) <= b;
  445. end;
  446. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  447. begin
  448. Result := a <= Cardinal(b);
  449. end;
  450. var
  451. CollationTable : array of PUCA_DataBook;
  452. function IndexOfCollation(const AName : string) : Integer;
  453. var
  454. i, c : Integer;
  455. p : Pointer;
  456. begin
  457. c := Length(AName);
  458. p := @AName[1];
  459. for i := 0 to Length(CollationTable) - 1 do begin
  460. if (Length(CollationTable[i]^.CollationName) = c) and
  461. (CompareByte((CollationTable[i]^.CollationName[1]),p^,c)=0)
  462. then
  463. exit(i);
  464. end;
  465. Result := -1;
  466. end;
  467. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  468. var
  469. i : Integer;
  470. begin
  471. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  472. if Result then begin
  473. i := Length(CollationTable);
  474. SetLength(CollationTable,(i+1));
  475. CollationTable[i] := ACollation;
  476. end;
  477. end;
  478. function UnregisterCollation(const AName : ansistring): Boolean;
  479. var
  480. i, c : Integer;
  481. begin
  482. i := IndexOfCollation(AName);
  483. Result := (i >= 0);
  484. if Result then begin
  485. c := Length(CollationTable);
  486. if (c = 1) then begin
  487. SetLength(CollationTable,0);
  488. end else begin
  489. CollationTable[i] := CollationTable[c-1];
  490. SetLength(CollationTable,(c-1));
  491. end;
  492. end;
  493. end;
  494. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  495. var
  496. i : Integer;
  497. begin
  498. i := IndexOfCollation(AName);
  499. if (i = -1) then
  500. Result := nil
  501. else
  502. Result := CollationTable[i];
  503. end;
  504. function GetCollationCount() : Integer;
  505. begin
  506. Result := Length(CollationTable);
  507. end;
  508. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  509. begin
  510. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  511. Result := nil
  512. else
  513. Result := CollationTable[AIndex];
  514. end;
  515. procedure PrepareCollation(
  516. ACollation : PUCA_DataBook;
  517. const ABaseName : ansistring;
  518. const AChangedFields : TCollationFields
  519. );
  520. var
  521. s : ansistring;
  522. p, base : PUCA_DataBook;
  523. begin
  524. if (ABaseName <> '') then
  525. s := ABaseName
  526. else
  527. s := ROOT_COLLATION_NAME;
  528. p := ACollation;
  529. base := FindCollation(s);
  530. if (base = nil) then
  531. Error(reCodesetConversion);
  532. p^.Base := base;
  533. if not(TCollationField.BackWard in AChangedFields) then
  534. p^.Backwards := base^.Backwards;
  535. if not(TCollationField.VariableLowLimit in AChangedFields) then
  536. p^.VariableLowLimit := base^.VariableLowLimit;
  537. if not(TCollationField.VariableHighLimit in AChangedFields) then
  538. p^.VariableLowLimit := base^.VariableHighLimit;
  539. end;
  540. {$INCLUDE unicodedata.inc}
  541. {$IFDEF ENDIAN_LITTLE}
  542. {$INCLUDE unicodedata_le.inc}
  543. {$ENDIF ENDIAN_LITTLE}
  544. {$IFDEF ENDIAN_BIG}
  545. {$INCLUDE unicodedata_be.inc}
  546. {$ENDIF ENDIAN_BIG}
  547. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  548. begin
  549. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  550. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  551. end;
  552. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  553. begin
  554. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  555. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  556. end;
  557. function UnicodeIsSurrogatePair(
  558. const AHighSurrogate,
  559. ALowSurrogate : UnicodeChar
  560. ) : Boolean;
  561. begin
  562. Result :=
  563. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  564. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  565. ) and
  566. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  567. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  568. )
  569. end;
  570. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  571. begin
  572. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  573. (Word(AValue) <= HIGH_SURROGATE_END);
  574. end;
  575. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  576. begin
  577. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  578. (Word(AValue) <= LOW_SURROGATE_END);
  579. end;
  580. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  581. begin
  582. Result:=
  583. @UC_PROP_ARRAY[
  584. UC_TABLE_3[
  585. UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
  586. [lo(ACodePoint) shr 4]
  587. ][lo(ACodePoint) and $F]
  588. ]; {
  589. @UC_PROP_ARRAY[
  590. UC_TABLE_2[
  591. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  592. WordRec(ACodePoint).Lo
  593. ]
  594. ];}
  595. end;
  596. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  597. begin
  598. Result:=
  599. @UC_PROP_ARRAY[
  600. UCO_TABLE_3[
  601. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  602. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  603. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  604. ]; {
  605. Result:=
  606. @UC_PROP_ARRAY[
  607. UCO_TABLE_2[
  608. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  609. Word(ALowS) - LOW_SURROGATE_BEGIN
  610. ]
  611. ]; }
  612. end;
  613. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  614. var
  615. l, h : UnicodeChar;
  616. begin
  617. if (ACodePoint <= High(Word)) then
  618. exit(GetProps(Word(ACodePoint)));
  619. FromUCS4(ACodePoint,h,l);
  620. Result := GetProps(h,l);
  621. end;
  622. //----------------------------------------------------------------------
  623. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  624. const
  625. SBase = $AC00;
  626. LBase = $1100;
  627. VBase = $1161;
  628. TBase = $11A7;
  629. LCount = 19;
  630. VCount = 21;
  631. TCount = 28;
  632. NCount = VCount * TCount; // 588
  633. SCount = LCount * NCount; // 11172
  634. var
  635. SIndex, L, V, T : Integer;
  636. begin
  637. SIndex := AChar - SBase;
  638. if (SIndex < 0) or (SIndex >= SCount) then begin
  639. ABuffer^ := AChar;
  640. exit(1);
  641. end;
  642. L := LBase + SIndex div NCount;
  643. V := VBase + (SIndex mod NCount) div TCount;
  644. T := TBase + SIndex mod TCount;
  645. ABuffer[0] := L;
  646. ABuffer[1] := V;
  647. Result := 2;
  648. if (T <> TBase) then begin
  649. ABuffer[2] := T;
  650. Inc(Result);
  651. end;
  652. end;
  653. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  654. var
  655. locStack : array[0..23] of Cardinal;
  656. locStackIdx : Integer;
  657. ResultBuffer : array[0..23] of Cardinal;
  658. ResultIdx : Integer;
  659. procedure AddCompositionToStack(const AIndex : Integer);
  660. var
  661. pdecIdx : ^TDecompositionIndexRec;
  662. k, kc : Integer;
  663. pu : ^UInt24;
  664. begin
  665. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  666. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  667. kc := pdecIdx^.Length;
  668. Inc(pu,kc);
  669. for k := 1 to kc do begin
  670. Dec(pu);
  671. locStack[locStackIdx + k] := pu^;
  672. end;
  673. locStackIdx := locStackIdx + kc;
  674. end;
  675. procedure AddResult(const AChar : Cardinal);inline;
  676. begin
  677. Inc(ResultIdx);
  678. ResultBuffer[ResultIdx] := AChar;
  679. end;
  680. function PopStack() : Cardinal;inline;
  681. begin
  682. Result := locStack[locStackIdx];
  683. Dec(locStackIdx);
  684. end;
  685. var
  686. cu : Cardinal;
  687. decIdx : SmallInt;
  688. locIsWord : Boolean;
  689. i : Integer;
  690. p : PUnicodeChar;
  691. begin
  692. ResultIdx := -1;
  693. locStackIdx := -1;
  694. AddCompositionToStack(ADecomposeIndex);
  695. while (locStackIdx >= 0) do begin
  696. cu := PopStack();
  697. locIsWord := (cu <= MAX_WORD);
  698. if locIsWord then
  699. decIdx := GetProps(Word(cu))^.DecompositionID
  700. else
  701. decIdx := GetProps(cu)^.DecompositionID;
  702. if (decIdx = -1) then
  703. AddResult(cu)
  704. else
  705. AddCompositionToStack(decIdx);
  706. end;
  707. p := ABuffer;
  708. Result := 0;
  709. for i := 0 to ResultIdx do begin
  710. cu := ResultBuffer[i];
  711. if (cu <= MAX_WORD) then begin
  712. p[0] := UnicodeChar(Word(cu));
  713. Inc(p);
  714. end else begin
  715. FromUCS4(cu,p[0],p[1]);
  716. Inc(p,2);
  717. Inc(Result);
  718. end;
  719. end;
  720. Result := Result + ResultIdx + 1;
  721. end;
  722. procedure CanonicalOrder(var AString : UnicodeString);
  723. begin
  724. CanonicalOrder(@AString[1],Length(AString));
  725. end;
  726. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  727. var
  728. i, c : SizeInt;
  729. p, q : PUnicodeChar;
  730. locIsSurrogateP, locIsSurrogateQ : Boolean;
  731. procedure Swap();
  732. var
  733. t, t1 : UnicodeChar;
  734. begin
  735. if not locIsSurrogateP then begin
  736. if not locIsSurrogateQ then begin
  737. t := p^;
  738. p^ := q^;
  739. q^ := t;
  740. exit;
  741. end;
  742. t := p^;
  743. p[0] := q[0];
  744. p[1] := q[1];
  745. q[1] := t;
  746. exit;
  747. end;
  748. if not locIsSurrogateQ then begin
  749. t := q[0];
  750. p[2] := p[1];
  751. p[1] := p[0];
  752. p[0] := t;
  753. exit;
  754. end;
  755. t := p[0];
  756. t1 := p[1];
  757. p[0] := q[0];
  758. p[1] := q[1];
  759. q[0] := t;
  760. q[1] := t1;
  761. end;
  762. var
  763. pu : PUC_Prop;
  764. cccp, cccq : Byte;
  765. begin
  766. c := ALength;
  767. if (c < 2) then
  768. exit;
  769. p := AStr;
  770. i := 1;
  771. while (i < c) do begin
  772. pu := GetProps(Word(p^));
  773. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  774. if locIsSurrogateP then begin
  775. if (i = (c - 1)) then
  776. Break;
  777. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  778. Inc(p);
  779. Inc(i);
  780. Continue;
  781. end;
  782. pu := GetProps(p[0],p[1]);
  783. end;
  784. if (pu^.CCC > 0) then begin
  785. cccp := pu^.CCC;
  786. if locIsSurrogateP then
  787. q := p + 2
  788. else
  789. q := p + 1;
  790. pu := GetProps(Word(q^));
  791. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  792. if locIsSurrogateQ then begin
  793. if (i = c) then
  794. Break;
  795. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  796. Inc(p);
  797. Inc(i);
  798. Continue;
  799. end;
  800. pu := GetProps(q[0],q[1]);
  801. end;
  802. cccq := pu^.CCC;
  803. if (cccq > 0) and (cccp > cccq) then begin
  804. Swap();
  805. if (i > 1) then begin
  806. Dec(p);
  807. Dec(i);
  808. pu := GetProps(Word(p^));
  809. if (pu^.Category = UGC_Surrogate) then begin
  810. if (i > 1) then begin
  811. Dec(p);
  812. Dec(i);
  813. end;
  814. end;
  815. Continue;
  816. end;
  817. end;
  818. end;
  819. if locIsSurrogateP then begin
  820. Inc(p);
  821. Inc(i);
  822. end;
  823. Inc(p);
  824. Inc(i);
  825. end;
  826. end;
  827. //Canonical Decomposition
  828. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  829. begin
  830. Result := NormalizeNFD(@AString[1],Length(AString));
  831. end;
  832. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  833. const MAX_EXPAND = 3;
  834. var
  835. i, c, kc, k : SizeInt;
  836. pp, pr : PUnicodeChar;
  837. pu : PUC_Prop;
  838. locIsSurrogate : Boolean;
  839. cpArray : array[0..7] of Cardinal;
  840. cp : Cardinal;
  841. begin
  842. c := ALength;
  843. SetLength(Result,(MAX_EXPAND*c));
  844. if (c > 0) then begin
  845. pp := AStr;
  846. pr := @Result[1];
  847. i := 1;
  848. while (i <= c) do begin
  849. pu := GetProps(Word(pp^));
  850. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  851. if locIsSurrogate then begin
  852. if (i = c) then
  853. Break;
  854. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  855. pr^ := pp^;
  856. Inc(pp);
  857. Inc(pr);
  858. Inc(i);
  859. Continue;
  860. end;
  861. pu := GetProps(pp[0],pp[1]);
  862. end;
  863. if pu^.HangulSyllable then begin
  864. if locIsSurrogate then begin
  865. cp := ToUCS4(pp[0],pp[1]);
  866. Inc(pp);
  867. Inc(i);
  868. end else begin
  869. cp := Word(pp^);
  870. end;
  871. kc := DecomposeHangul(cp,@cpArray[0]);
  872. for k := 0 to kc - 1 do begin
  873. if (cpArray[k] <= MAX_WORD) then begin
  874. pr^ := UnicodeChar(Word(cpArray[k]));
  875. pr := pr + 1;
  876. end else begin
  877. FromUCS4(cpArray[k],pr[0],pr[1]);
  878. pr := pr + 2;
  879. end;
  880. end;
  881. if (kc > 0) then
  882. Dec(pr);
  883. end else begin
  884. if (pu^.DecompositionID = -1) then begin
  885. pr^ := pp^;
  886. if locIsSurrogate then begin
  887. Inc(pp);
  888. Inc(pr);
  889. Inc(i);
  890. pr^ := pp^;
  891. end;
  892. end else begin
  893. k := Decompose(pu^.DecompositionID,pr);
  894. pr := pr + (k - 1);
  895. if locIsSurrogate then begin
  896. Inc(pp);
  897. Inc(i);
  898. end;
  899. end;
  900. end;
  901. Inc(pp);
  902. Inc(pr);
  903. Inc(i);
  904. end;
  905. Dec(pp);
  906. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  907. SetLength(Result,i);
  908. CanonicalOrder(@Result[1],Length(Result));
  909. end;
  910. end;
  911. type
  912. TBitOrder = 0..7;
  913. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  914. begin
  915. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  916. end;
  917. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  918. begin
  919. if AValue then
  920. AData := AData or (1 shl (ABit mod 8))
  921. else
  922. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  923. end;
  924. { TUCA_PropItemContextTreeNodeRec }
  925. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  926. begin
  927. if (Self.Left = 0) then
  928. Result := nil
  929. else
  930. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  931. end;
  932. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  933. begin
  934. if (Self.Right = 0) then
  935. Result := nil
  936. else
  937. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  938. end;
  939. { TUCA_PropItemContextRec }
  940. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  941. begin
  942. Result := PUInt24(
  943. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  944. SizeOf(Self.WeightCount)
  945. );
  946. end;
  947. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  948. begin
  949. Result := PUCA_PropWeights(
  950. PtrUInt(@Self) +
  951. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  952. (Self.CodePointCount*SizeOf(UInt24))
  953. );
  954. end;
  955. { TUCA_PropItemContextTreeRec }
  956. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  957. begin
  958. if (Size = 0) then
  959. Result := nil
  960. else
  961. Result := PUCA_PropItemContextTreeNodeRec(
  962. PtrUInt(
  963. PtrUInt(@Self) + SizeOf(UInt24){Size}
  964. )
  965. );
  966. end;
  967. function CompareCodePoints(
  968. A : PUInt24; LA : Integer;
  969. B : PUInt24; LB : Integer
  970. ) : Integer;
  971. var
  972. i, hb : Integer;
  973. begin
  974. if (A = B) then
  975. exit(0);
  976. Result := 1;
  977. hb := LB - 1;
  978. for i := 0 to LA - 1 do begin
  979. if (i > hb) then
  980. exit;
  981. if (A[i] < B[i]) then
  982. exit(-1);
  983. if (A[i] > B[i]) then
  984. exit(1);
  985. end;
  986. if (LA = LB) then
  987. exit(0);
  988. exit(-1);
  989. end;
  990. function TUCA_PropItemContextTreeRec.Find(
  991. const AChars : PUInt24;
  992. const ACharCount : Integer;
  993. out ANode : PUCA_PropItemContextTreeNodeRec
  994. ) : Boolean;
  995. var
  996. t : PUCA_PropItemContextTreeNodeRec;
  997. begin
  998. t := Data;
  999. while (t <> nil) do begin
  1000. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1001. 0 : Break;
  1002. -1 : t := t^.GetLeftNode();
  1003. else
  1004. t := t^.GetRightNode();
  1005. end;
  1006. end;
  1007. Result := (t <> nil);
  1008. if Result then
  1009. ANode := t;
  1010. end;
  1011. { TUC_Prop }
  1012. function TUC_Prop.GetCategory: Byte;
  1013. begin
  1014. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1015. end;
  1016. function TUC_Prop.GetNumericValue: Double;
  1017. begin
  1018. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1019. end;
  1020. procedure TUC_Prop.SetCategory(AValue: Byte);
  1021. begin
  1022. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1023. end;
  1024. function TUC_Prop.GetWhiteSpace: Boolean;
  1025. begin
  1026. Result := IsBitON(CategoryData,0);
  1027. end;
  1028. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1029. begin
  1030. SetBit(CategoryData,0,AValue);
  1031. end;
  1032. function TUC_Prop.GetHangulSyllable: Boolean;
  1033. begin
  1034. Result := IsBitON(CategoryData,1);
  1035. end;
  1036. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1037. begin
  1038. SetBit(CategoryData,1,AValue);
  1039. end;
  1040. { TUCA_DataBook }
  1041. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1042. begin
  1043. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1044. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1045. end;
  1046. { TUCA_PropItemRec }
  1047. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  1048. begin
  1049. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  1050. end;
  1051. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  1052. begin
  1053. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  1054. end;
  1055. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1056. begin
  1057. if HasCodePoint() then begin
  1058. if Contextual then
  1059. Result := PUInt24(
  1060. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1061. Cardinal(GetContext()^.Size)
  1062. )^
  1063. else
  1064. Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  1065. end else begin
  1066. {$ifdef uni_debug}
  1067. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1068. {$else uni_debug}
  1069. Result := ZERO_UINT24;
  1070. {$endif uni_debug}
  1071. end
  1072. end;
  1073. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1074. begin
  1075. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1076. end;
  1077. function TUCA_PropItemRec.IsValid() : Boolean;
  1078. begin
  1079. Result := IsBitON(Flags,FLAG_VALID);
  1080. end;
  1081. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1082. begin
  1083. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1084. end;}
  1085. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1086. var
  1087. c : Integer;
  1088. p : PByte;
  1089. pd : PUCA_PropWeights;
  1090. begin
  1091. c := WeightLength;
  1092. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1093. pd := ADest;
  1094. pd^.Weights[0] := PWord(p)^;
  1095. p := p + 2;
  1096. if not IsWeightCompress_1() then begin
  1097. pd^.Weights[1] := PWord(p)^;
  1098. p := p + 2;
  1099. end else begin
  1100. pd^.Weights[1] := p^;
  1101. p := p + 1;
  1102. end;
  1103. if not IsWeightCompress_2() then begin
  1104. pd^.Weights[2] := PWord(p)^;
  1105. p := p + 2;
  1106. end else begin
  1107. pd^.Weights[2] := p^;
  1108. p := p + 1;
  1109. end;
  1110. if (c > 1) then
  1111. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1112. end;
  1113. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  1114. begin
  1115. Result := SizeOf(TUCA_PropItemRec);
  1116. if (WeightLength > 0) then begin
  1117. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1118. if IsWeightCompress_1() then
  1119. Result := Result - 1;
  1120. if IsWeightCompress_2() then
  1121. Result := Result - 1;
  1122. end;
  1123. if HasCodePoint() then
  1124. Result := Result + SizeOf(UInt24);
  1125. if Contextual then
  1126. Result := Result + Cardinal(GetContext()^.Size);
  1127. end;
  1128. function TUCA_PropItemRec.GetContextual: Boolean;
  1129. begin
  1130. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1131. end;
  1132. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1133. var
  1134. p : PtrUInt;
  1135. begin
  1136. if not Contextual then
  1137. exit(nil);
  1138. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1139. if IsBitON(Flags,FLAG_CODEPOINT) then
  1140. p := p + SizeOf(UInt24);
  1141. Result := PUCA_PropItemContextTreeRec(p);
  1142. end;
  1143. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1144. begin
  1145. Result := IsBitON(Flags,FLAG_DELETION);
  1146. end;
  1147. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1148. var
  1149. i : Cardinal;
  1150. begin
  1151. if (ABook^.BMP_Table2 = nil) then
  1152. exit(nil);
  1153. i := ABook^.BMP_Table2[
  1154. (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
  1155. Lo(Word(AChar))
  1156. ];
  1157. if (i > 0) then
  1158. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1159. else
  1160. Result := nil;
  1161. end;
  1162. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1163. var
  1164. i : Cardinal;
  1165. begin
  1166. if (ABook^.OBMP_Table2 = nil) then
  1167. exit(nil);
  1168. i := ABook^.OBMP_Table2[
  1169. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1170. Word(ALowS) - LOW_SURROGATE_BEGIN
  1171. ];
  1172. if (i > 0) then
  1173. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1174. else
  1175. Result := nil;
  1176. end;
  1177. {$include weight_derivation.inc}
  1178. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1179. var
  1180. bb : TUCASortKey;
  1181. begin
  1182. SetLength(bb,Length(B));
  1183. if (Length(bb) > 0) then
  1184. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1185. Result := CompareSortKey(A,bb);
  1186. end;
  1187. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1188. var
  1189. i, hb : Integer;
  1190. begin
  1191. if (Pointer(A) = Pointer(B)) then
  1192. exit(0);
  1193. Result := 1;
  1194. hb := Length(B) - 1;
  1195. for i := 0 to Length(A) - 1 do begin
  1196. if (i > hb) then
  1197. exit;
  1198. if (A[i] < B[i]) then
  1199. exit(-1);
  1200. if (A[i] > B[i]) then
  1201. exit(1);
  1202. end;
  1203. if (Length(A) = Length(B)) then
  1204. exit(0);
  1205. exit(-1);
  1206. end;
  1207. type
  1208. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1209. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1210. var
  1211. r : TUCASortKey;
  1212. i, c, k, ral, levelCount : Integer;
  1213. pce : PUCA_PropWeights;
  1214. begin
  1215. c := Length(ACEList);
  1216. if (c = 0) then
  1217. exit(nil);
  1218. levelCount := Length(ACEList[0].Weights);
  1219. SetLength(r,(levelCount*c + levelCount));
  1220. ral := 0;
  1221. for i := 0 to levelCount - 1 do begin
  1222. if not ACollation^.Backwards[i] then begin
  1223. pce := @ACEList[0];
  1224. for k := 0 to c - 1 do begin
  1225. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1226. r[ral] := pce^.Weights[i];
  1227. ral := ral + 1;
  1228. end;
  1229. pce := pce + 1;
  1230. end;
  1231. end else begin
  1232. pce := @ACEList[c-1];
  1233. for k := 0 to c - 1 do begin
  1234. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1235. r[ral] := pce^.Weights[i];
  1236. ral := ral + 1;
  1237. end;
  1238. pce := pce - 1;
  1239. end;
  1240. end;
  1241. r[ral] := 0;
  1242. ral := ral + 1;
  1243. end;
  1244. ral := ral - 1;
  1245. SetLength(r,ral);
  1246. Result := r;
  1247. end;
  1248. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1249. var
  1250. r : TUCASortKey;
  1251. i, c, k, ral, levelCount : Integer;
  1252. pce : PUCA_PropWeights;
  1253. begin
  1254. c := Length(ACEList);
  1255. if (c = 0) then
  1256. exit(nil);
  1257. levelCount := Length(ACEList[0].Weights);
  1258. SetLength(r,(levelCount*c + levelCount));
  1259. ral := 0;
  1260. for i := 0 to levelCount - 1 do begin
  1261. if not ACollation^.Backwards[i] then begin
  1262. pce := @ACEList[0];
  1263. for k := 0 to c - 1 do begin
  1264. if (pce^.Weights[i] <> 0) then begin
  1265. r[ral] := pce^.Weights[i];
  1266. ral := ral + 1;
  1267. end;
  1268. pce := pce + 1;
  1269. end;
  1270. end else begin
  1271. pce := @ACEList[c-1];
  1272. for k := 0 to c - 1 do begin
  1273. if (pce^.Weights[i] <> 0) then begin
  1274. r[ral] := pce^.Weights[i];
  1275. ral := ral + 1;
  1276. end;
  1277. pce := pce - 1;
  1278. end;
  1279. end;
  1280. r[ral] := 0;
  1281. ral := ral + 1;
  1282. end;
  1283. ral := ral - 1;
  1284. SetLength(r,ral);
  1285. Result := r;
  1286. end;
  1287. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1288. var
  1289. r : TUCASortKey;
  1290. i, c, k, ral, levelCount : Integer;
  1291. pce : PUCA_PropWeights;
  1292. variableState : Boolean;
  1293. begin
  1294. c := Length(ACEList);
  1295. if (c = 0) then
  1296. exit(nil);
  1297. levelCount := Length(ACEList[0].Weights);
  1298. SetLength(r,(levelCount*c + levelCount));
  1299. ral := 0;
  1300. for i := 0 to levelCount - 1 do begin
  1301. if not ACollation^.Backwards[i] then begin
  1302. variableState := False;
  1303. pce := @ACEList[0];
  1304. for k := 0 to c - 1 do begin
  1305. if not ACollation^.IsVariable(pce) then begin
  1306. if (pce^.Weights[0] <> 0) then
  1307. variableState := False;
  1308. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1309. r[ral] := pce^.Weights[i];
  1310. ral := ral + 1;
  1311. end;
  1312. end else begin
  1313. variableState := True;
  1314. end;
  1315. pce := pce + 1;
  1316. end;
  1317. end else begin
  1318. pce := @ACEList[c-1];
  1319. for k := 0 to c - 1 do begin
  1320. if not ACollation^.IsVariable(pce) then begin
  1321. if (pce^.Weights[0] <> 0) then
  1322. variableState := False;
  1323. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1324. r[ral] := pce^.Weights[i];
  1325. ral := ral + 1;
  1326. end;
  1327. end else begin
  1328. variableState := True;
  1329. end;
  1330. pce := pce - 1;
  1331. end;
  1332. end;
  1333. r[ral] := 0;
  1334. ral := ral + 1;
  1335. end;
  1336. ral := ral - 1;
  1337. SetLength(r,ral);
  1338. Result := r;
  1339. end;
  1340. function FormKeyShiftedTrimmed(
  1341. const ACEList : TUCA_PropWeightsArray;
  1342. const ACollation : PUCA_DataBook
  1343. ) : TUCASortKey;
  1344. var
  1345. i : Integer;
  1346. p : ^TUCASortKeyItem;
  1347. begin
  1348. Result := FormKeyShifted(ACEList,ACollation);
  1349. i := Length(Result) - 1;
  1350. if (i >= 0) then begin
  1351. p := @Result[i];
  1352. while (i >= 0) do begin
  1353. if (p^ <> $FFFF) then
  1354. Break;
  1355. Dec(i);
  1356. Dec(p);
  1357. end;
  1358. if ((i+1) < Length(Result)) then
  1359. SetLength(Result,(i+1));
  1360. end;
  1361. end;
  1362. function FindChild(
  1363. const ACodePoint : Cardinal;
  1364. const AParent : PUCA_PropItemRec
  1365. ) : PUCA_PropItemRec;inline;
  1366. var
  1367. k : Integer;
  1368. begin
  1369. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1370. for k := 0 to AParent^.ChildCount - 1 do begin
  1371. if (ACodePoint = Result^.CodePoint) then
  1372. exit;
  1373. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1374. end;
  1375. Result := nil;
  1376. end;
  1377. function ComputeSortKey(
  1378. const AString : UnicodeString;
  1379. const ACollation : PUCA_DataBook
  1380. ) : TUCASortKey;
  1381. begin
  1382. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1383. end;
  1384. function ComputeRawSortKey(
  1385. const AStr : PUnicodeChar;
  1386. const ALength : SizeInt;
  1387. const ACollation : PUCA_DataBook
  1388. ) : TUCA_PropWeightsArray;
  1389. var
  1390. r : TUCA_PropWeightsArray;
  1391. ral {used length of "r"}: Integer;
  1392. rl {capacity of "r"} : Integer;
  1393. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1394. begin
  1395. if (rl < AMinGrow) then
  1396. rl := rl + AMinGrow
  1397. else
  1398. rl := 2 * rl;
  1399. SetLength(r,rl);
  1400. end;
  1401. var
  1402. i : Integer;
  1403. s : UnicodeString;
  1404. ps : PUnicodeChar;
  1405. cp : Cardinal;
  1406. cl : PUCA_DataBook;
  1407. pp : PUCA_PropItemRec;
  1408. ppLevel : Byte;
  1409. removedCharIndex : array of DWord;
  1410. removedCharIndexLength : DWord;
  1411. locHistory : array[0..24] of record
  1412. i : Integer;
  1413. cl : PUCA_DataBook;
  1414. pp : PUCA_PropItemRec;
  1415. ppLevel : Byte;
  1416. cp : Cardinal;
  1417. removedCharIndexLength : DWord;
  1418. end;
  1419. locHistoryTop : Integer;
  1420. suppressState : record
  1421. cl : PUCA_DataBook;
  1422. CharCount : Integer;
  1423. end;
  1424. LastKeyOwner : record
  1425. Length : Integer;
  1426. Chars : array[0..24] of UInt24;
  1427. end;
  1428. procedure SaveKeyOwner();
  1429. var
  1430. k : Integer;
  1431. kppLevel : Byte;
  1432. begin
  1433. k := 0;
  1434. kppLevel := High(Byte);
  1435. while (k <= locHistoryTop) do begin
  1436. if (kppLevel <> locHistory[k].ppLevel) then begin
  1437. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1438. kppLevel := locHistory[k].ppLevel;
  1439. end;
  1440. k := k + 1;
  1441. end;
  1442. if (k = 0) or (kppLevel <> ppLevel) then begin
  1443. LastKeyOwner.Chars[k] := cp;
  1444. k := k + 1;
  1445. end;
  1446. LastKeyOwner.Length := k;
  1447. end;
  1448. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1449. begin
  1450. SaveKeyOwner();
  1451. if ((ral + AItem^.WeightLength) > rl) then
  1452. GrowKey(AItem^.WeightLength);
  1453. AItem^.GetWeightArray(@r[ral]);
  1454. ral := ral + AItem^.WeightLength;
  1455. end;
  1456. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1457. begin
  1458. if ((ral + AItem^.WeightCount) > rl) then
  1459. GrowKey(AItem^.WeightCount);
  1460. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1461. ral := ral + AItem^.WeightCount;
  1462. end;
  1463. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1464. begin
  1465. SaveKeyOwner();
  1466. if ((ral + 2) > rl) then
  1467. GrowKey();
  1468. DeriveWeight(ACodePoint,@r[ral]);
  1469. ral := ral + 2;
  1470. end;
  1471. procedure RecordDeletion();inline;
  1472. begin
  1473. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1474. if (suppressState.cl = nil) or
  1475. (suppressState.CharCount > ppLevel)
  1476. then begin
  1477. suppressState.cl := cl;
  1478. suppressState.CharCount := ppLevel;
  1479. end;
  1480. end;
  1481. end;
  1482. procedure RecordStep();inline;
  1483. begin
  1484. Inc(locHistoryTop);
  1485. locHistory[locHistoryTop].i := i;
  1486. locHistory[locHistoryTop].cl := cl;
  1487. locHistory[locHistoryTop].pp := pp;
  1488. locHistory[locHistoryTop].ppLevel := ppLevel;
  1489. locHistory[locHistoryTop].cp := cp;
  1490. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1491. RecordDeletion();
  1492. end;
  1493. procedure ClearHistory();inline;
  1494. begin
  1495. locHistoryTop := -1;
  1496. end;
  1497. function HasHistory() : Boolean;inline;
  1498. begin
  1499. Result := (locHistoryTop >= 0);
  1500. end;
  1501. function GetHistoryLength() : Integer;inline;
  1502. begin
  1503. Result := (locHistoryTop + 1);
  1504. end;
  1505. procedure GoBack();inline;
  1506. begin
  1507. Assert(locHistoryTop >= 0);
  1508. i := locHistory[locHistoryTop].i;
  1509. cp := locHistory[locHistoryTop].cp;
  1510. cl := locHistory[locHistoryTop].cl;
  1511. pp := locHistory[locHistoryTop].pp;
  1512. ppLevel := locHistory[locHistoryTop].ppLevel;
  1513. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1514. ps := @s[i];
  1515. Dec(locHistoryTop);
  1516. end;
  1517. var
  1518. c : Integer;
  1519. lastUnblockedNonstarterCCC : Byte;
  1520. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1521. var
  1522. k : DWord;
  1523. pk : PUnicodeChar;
  1524. puk : PUC_Prop;
  1525. begin
  1526. k := AStartFrom;
  1527. if (k > c) then
  1528. exit(False);
  1529. if (removedCharIndexLength>0) and
  1530. (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0)
  1531. then begin
  1532. exit(False);
  1533. end;
  1534. {if (k = (i+1)) or
  1535. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1536. then
  1537. lastUnblockedNonstarterCCC := 0;}
  1538. pk := @s[k];
  1539. if UnicodeIsHighSurrogate(pk^) then begin
  1540. if (k = c) then
  1541. exit(False);
  1542. if UnicodeIsLowSurrogate(pk[1]) then
  1543. puk := GetProps(pk[0],pk[1])
  1544. else
  1545. puk := GetProps(Word(pk^));
  1546. end else begin
  1547. puk := GetProps(Word(pk^));
  1548. end;
  1549. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1550. exit(False);
  1551. lastUnblockedNonstarterCCC := puk^.CCC;
  1552. Result := True;
  1553. end;
  1554. procedure RemoveChar(APos : Integer);inline;
  1555. begin
  1556. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1557. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1558. removedCharIndex[removedCharIndexLength] := APos;
  1559. Inc(removedCharIndexLength);
  1560. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  1561. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1562. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1563. removedCharIndex[removedCharIndexLength] := APos+1;
  1564. Inc(removedCharIndexLength);
  1565. end;
  1566. end;
  1567. procedure Inc_I();inline;
  1568. begin
  1569. if (removedCharIndexLength = 0) then begin
  1570. Inc(i);
  1571. Inc(ps);
  1572. exit;
  1573. end;
  1574. while True do begin
  1575. Inc(i);
  1576. Inc(ps);
  1577. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  1578. Break;
  1579. end;
  1580. end;
  1581. var
  1582. surrogateState : Boolean;
  1583. function MoveToNextChar() : Boolean;inline;
  1584. begin
  1585. Result := True;
  1586. if UnicodeIsHighSurrogate(ps[0]) then begin
  1587. if (i = c) then
  1588. exit(False);
  1589. if UnicodeIsLowSurrogate(ps[1]) then begin
  1590. surrogateState := True;
  1591. cp := ToUCS4(ps[0],ps[1]);
  1592. end else begin
  1593. surrogateState := False;
  1594. cp := Word(ps[0]);
  1595. end;
  1596. end else begin
  1597. surrogateState := False;
  1598. cp := Word(ps[0]);
  1599. end;
  1600. end;
  1601. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  1602. begin
  1603. cl := nil;
  1604. pp := nil;
  1605. ppLevel := 0;
  1606. if AClearSuppressInfo then begin
  1607. suppressState.cl := nil;
  1608. suppressState.CharCount := 0;
  1609. end;
  1610. end;
  1611. function FindPropUCA() : Boolean;
  1612. var
  1613. candidateCL : PUCA_DataBook;
  1614. begin
  1615. pp := nil;
  1616. if (cl = nil) then
  1617. candidateCL := ACollation
  1618. else
  1619. candidateCL := cl;
  1620. if surrogateState then begin
  1621. while (candidateCL <> nil) do begin
  1622. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  1623. if (pp <> nil) then
  1624. break;
  1625. candidateCL := candidateCL^.Base;
  1626. end;
  1627. end else begin
  1628. while (candidateCL <> nil) do begin
  1629. pp := GetPropUCA(ps[0],candidateCL);
  1630. if (pp <> nil) then
  1631. break;
  1632. candidateCL := candidateCL^.Base;
  1633. end;
  1634. end;
  1635. cl := candidateCL;
  1636. Result := (pp <> nil);
  1637. end;
  1638. procedure AddWeightsAndClear();inline;
  1639. var
  1640. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1641. begin
  1642. if (pp^.WeightLength > 0) then begin
  1643. AddWeights(pp);
  1644. end else
  1645. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1646. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1647. (ctxNode^.Data.WeightCount > 0)
  1648. then begin
  1649. AddContextWeights(@ctxNode^.Data);
  1650. end;
  1651. //AddWeights(pp);
  1652. ClearHistory();
  1653. ClearPP();
  1654. end;
  1655. procedure StartMatch();
  1656. procedure HandleLastChar();
  1657. var
  1658. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1659. begin
  1660. while True do begin
  1661. if pp^.IsValid() then begin
  1662. if (pp^.WeightLength > 0) then
  1663. AddWeights(pp)
  1664. else
  1665. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1666. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1667. (ctxNode^.Data.WeightCount > 0)
  1668. then
  1669. AddContextWeights(@ctxNode^.Data)
  1670. else
  1671. AddComputedWeights(cp){handle deletion of code point};
  1672. break;
  1673. end;
  1674. if (cl^.Base = nil) then begin
  1675. AddComputedWeights(cp);
  1676. break;
  1677. end;
  1678. cl := cl^.Base;
  1679. if not FindPropUCA() then begin
  1680. AddComputedWeights(cp);
  1681. break;
  1682. end;
  1683. end;
  1684. end;
  1685. var
  1686. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  1687. begin
  1688. ppLevel := 0;
  1689. if not FindPropUCA() then begin
  1690. AddComputedWeights(cp);
  1691. ClearHistory();
  1692. ClearPP();
  1693. end else begin
  1694. if (i = c) then begin
  1695. HandleLastChar();
  1696. end else begin
  1697. if pp^.IsValid()then begin
  1698. if (pp^.ChildCount = 0) then begin
  1699. if (pp^.WeightLength > 0) then
  1700. AddWeights(pp)
  1701. else
  1702. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1703. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  1704. (tmpCtxNode^.Data.WeightCount > 0)
  1705. then
  1706. AddContextWeights(@tmpCtxNode^.Data)
  1707. else
  1708. AddComputedWeights(cp){handle deletion of code point};
  1709. ClearPP();
  1710. ClearHistory();
  1711. end else begin
  1712. RecordStep();
  1713. end
  1714. end else begin
  1715. if (pp^.ChildCount = 0) then begin
  1716. AddComputedWeights(cp);
  1717. ClearPP();
  1718. ClearHistory();
  1719. end else begin
  1720. RecordStep();
  1721. end;
  1722. end ;
  1723. end;
  1724. end;
  1725. end;
  1726. function TryPermutation() : Boolean;
  1727. var
  1728. kk : Integer;
  1729. b : Boolean;
  1730. puk : PUC_Prop;
  1731. ppk : PUCA_PropItemRec;
  1732. begin
  1733. Result := False;
  1734. puk := GetProps(cp);
  1735. if (puk^.CCC = 0) then
  1736. exit;
  1737. lastUnblockedNonstarterCCC := puk^.CCC;
  1738. if surrogateState then
  1739. kk := i + 2
  1740. else
  1741. kk := i + 1;
  1742. while IsUnblockedNonstarter(kk) do begin
  1743. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  1744. if b then
  1745. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  1746. else
  1747. ppk := FindChild(Word(s[kk]),pp);
  1748. if (ppk <> nil) then begin
  1749. pp := ppk;
  1750. RemoveChar(kk);
  1751. Inc(ppLevel);
  1752. RecordStep();
  1753. Result := True;
  1754. if (pp^.ChildCount = 0 ) then
  1755. Break;
  1756. end;
  1757. if b then
  1758. Inc(kk);
  1759. Inc(kk);
  1760. end;
  1761. end;
  1762. procedure AdvanceCharPos();inline;
  1763. begin
  1764. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  1765. Inc(i);
  1766. Inc(ps);
  1767. end;
  1768. Inc_I();
  1769. end;
  1770. var
  1771. ok : Boolean;
  1772. pp1 : PUCA_PropItemRec;
  1773. cltemp : PUCA_DataBook;
  1774. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1775. begin
  1776. if (ALength = 0) then
  1777. exit(nil);
  1778. c := ALength;
  1779. s := NormalizeNFD(AStr,c);
  1780. c := Length(s);
  1781. rl := 3*c;
  1782. SetLength(r,rl);
  1783. ral := 0;
  1784. ps := @s[1];
  1785. ClearPP();
  1786. locHistoryTop := -1;
  1787. removedCharIndexLength := 0;
  1788. FillByte(suppressState,SizeOf(suppressState),0);
  1789. LastKeyOwner.Length := 0;
  1790. i := 1;
  1791. while (i <= c) and MoveToNextChar() do begin
  1792. if (pp = nil) then begin // Start Matching
  1793. StartMatch();
  1794. end else begin
  1795. pp1 := FindChild(cp,pp);
  1796. if (pp1 <> nil) then begin
  1797. Inc(ppLevel);
  1798. pp := pp1;
  1799. if (pp^.ChildCount = 0) or (i = c) then begin
  1800. ok := False;
  1801. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  1802. if (pp^.WeightLength > 0) then begin
  1803. AddWeightsAndClear();
  1804. ok := True;
  1805. end else
  1806. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1807. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1808. (ctxNode^.Data.WeightCount > 0)
  1809. then begin
  1810. AddContextWeights(@ctxNode^.Data);
  1811. ClearHistory();
  1812. ClearPP();
  1813. ok := True;
  1814. end
  1815. end;
  1816. if not ok then begin
  1817. RecordDeletion();
  1818. ok := False;
  1819. while HasHistory() do begin
  1820. GoBack();
  1821. if pp^.IsValid() and
  1822. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1823. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1824. )
  1825. then begin
  1826. AddWeightsAndClear();
  1827. ok := True;
  1828. Break;
  1829. end;
  1830. end;
  1831. if not ok then begin
  1832. cltemp := cl^.Base;
  1833. if (cltemp <> nil) then begin
  1834. ClearPP(False);
  1835. cl := cltemp;
  1836. Continue;
  1837. end;
  1838. end;
  1839. if not ok then begin
  1840. AddComputedWeights(cp);
  1841. ClearHistory();
  1842. ClearPP();
  1843. end;
  1844. end;
  1845. end else begin
  1846. RecordStep();
  1847. end;
  1848. end else begin
  1849. // permutations !
  1850. ok := False;
  1851. if TryPermutation() and pp^.IsValid() then begin
  1852. if (suppressState.CharCount = 0) then begin
  1853. AddWeightsAndClear();
  1854. Continue;
  1855. end;
  1856. while True do begin
  1857. if pp^.IsValid() and
  1858. (pp^.WeightLength > 0) and
  1859. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1860. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1861. )
  1862. then begin
  1863. AddWeightsAndClear();
  1864. ok := True;
  1865. break;
  1866. end;
  1867. if not HasHistory() then
  1868. break;
  1869. GoBack();
  1870. if (pp = nil) then
  1871. break;
  1872. end;
  1873. end;
  1874. if not ok then begin
  1875. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  1876. if (pp^.WeightLength > 0) then begin
  1877. AddWeightsAndClear();
  1878. ok := True;
  1879. end else
  1880. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1881. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1882. (ctxNode^.Data.WeightCount > 0)
  1883. then begin
  1884. AddContextWeights(@ctxNode^.Data);
  1885. ClearHistory();
  1886. ClearPP();
  1887. ok := True;
  1888. end
  1889. end;
  1890. if ok then
  1891. Continue;
  1892. end;
  1893. if not ok then begin
  1894. if (cl^.Base <> nil) then begin
  1895. cltemp := cl^.Base;
  1896. while HasHistory() do
  1897. GoBack();
  1898. pp := nil;
  1899. ppLevel := 0;
  1900. cl := cltemp;
  1901. Continue;
  1902. end;
  1903. //walk back
  1904. ok := False;
  1905. while HasHistory() do begin
  1906. GoBack();
  1907. if pp^.IsValid() and
  1908. (pp^.WeightLength > 0) and
  1909. ( (suppressState.CharCount = 0) or
  1910. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1911. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1912. )
  1913. )
  1914. then begin
  1915. AddWeightsAndClear();
  1916. ok := True;
  1917. Break;
  1918. end;
  1919. end;
  1920. if ok then begin
  1921. AdvanceCharPos();
  1922. Continue;
  1923. end;
  1924. if (pp <> nil) then begin
  1925. AddComputedWeights(cp);
  1926. ClearHistory();
  1927. ClearPP();
  1928. end;
  1929. end;
  1930. end;
  1931. end;
  1932. if surrogateState then begin
  1933. Inc(ps);
  1934. Inc(i);
  1935. end;
  1936. //
  1937. Inc_I();
  1938. end;
  1939. SetLength(r,ral);
  1940. Result := r;
  1941. end;
  1942. function ComputeSortKey(
  1943. const AStr : PUnicodeChar;
  1944. const ALength : SizeInt;
  1945. const ACollation : PUCA_DataBook
  1946. ) : TUCASortKey;
  1947. var
  1948. r : TUCA_PropWeightsArray;
  1949. begin
  1950. r := ComputeRawSortKey(AStr,ALength,ACollation);
  1951. case ACollation^.VariableWeight of
  1952. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
  1953. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
  1954. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
  1955. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
  1956. else
  1957. Result := FormKeyShifted(r,ACollation);
  1958. end;
  1959. end;
  1960. end.