unicodedata.pas 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit unicodedata;
  17. {$mode delphi}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$SCOPEDENUMS ON}
  21. {$pointermath on}
  22. {$define USE_INLINE}
  23. { $define uni_debug}
  24. interface
  25. uses
  26. SysUtils;
  27. const
  28. MAX_WORD = High(Word);
  29. LOW_SURROGATE_BEGIN = Word($DC00);
  30. LOW_SURROGATE_END = Word($DFFF);
  31. HIGH_SURROGATE_BEGIN = Word($D800);
  32. HIGH_SURROGATE_END = Word($DBFF);
  33. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  34. UCS4_HALF_BASE = LongWord($10000);
  35. UCS4_HALF_MASK = Word($3FF);
  36. MAX_LEGAL_UTF32 = $10FFFF;
  37. const
  38. // Unicode General Category
  39. UGC_UppercaseLetter = 0;
  40. UGC_LowercaseLetter = 1;
  41. UGC_TitlecaseLetter = 2;
  42. UGC_ModifierLetter = 3;
  43. UGC_OtherLetter = 4;
  44. UGC_NonSpacingMark = 5;
  45. UGC_CombiningMark = 6;
  46. UGC_EnclosingMark = 7;
  47. UGC_DecimalNumber = 8;
  48. UGC_LetterNumber = 9;
  49. UGC_OtherNumber = 10;
  50. UGC_ConnectPunctuation = 11;
  51. UGC_DashPunctuation = 12;
  52. UGC_OpenPunctuation = 13;
  53. UGC_ClosePunctuation = 14;
  54. UGC_InitialPunctuation = 15;
  55. UGC_FinalPunctuation = 16;
  56. UGC_OtherPunctuation = 17;
  57. UGC_MathSymbol = 18;
  58. UGC_CurrencySymbol = 19;
  59. UGC_ModifierSymbol = 20;
  60. UGC_OtherSymbol = 21;
  61. UGC_SpaceSeparator = 22;
  62. UGC_LineSeparator = 23;
  63. UGC_ParagraphSeparator = 24;
  64. UGC_Control = 25;
  65. UGC_Format = 26;
  66. UGC_Surrogate = 27;
  67. UGC_PrivateUse = 28;
  68. UGC_Unassigned = 29;
  69. type
  70. TUInt24Rec = packed record
  71. public
  72. {$ifdef FPC_LITTLE_ENDIAN}
  73. byte0, byte1, byte2 : Byte;
  74. {$else FPC_LITTLE_ENDIAN}
  75. byte2, byte1, byte0 : Byte;
  76. {$endif FPC_LITTLE_ENDIAN}
  77. public
  78. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  79. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  80. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  81. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  82. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  83. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  84. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  85. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  86. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  87. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  88. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  89. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  90. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  91. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  92. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  93. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  94. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  95. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  96. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  97. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  98. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  99. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  100. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  107. end;
  108. UInt24 = TUInt24Rec;
  109. PUInt24 = ^UInt24;
  110. const
  111. ZERO_UINT24 : UInt24 =
  112. {$ifdef FPC_LITTLE_ENDIAN}
  113. (byte0 : 0; byte1 : 0; byte2 : 0;);
  114. {$else FPC_LITTLE_ENDIAN}
  115. (byte2 : 0; byte1 : 0; byte0 : 0;);
  116. {$endif FPC_LITTLE_ENDIAN}
  117. type
  118. EUnicodeException = class(Exception)
  119. end;
  120. PUC_Prop = ^TUC_Prop;
  121. { TUC_Prop }
  122. TUC_Prop = packed record
  123. private
  124. function GetCategory : Byte;inline;
  125. procedure SetCategory(AValue : Byte);
  126. function GetWhiteSpace : Boolean;inline;
  127. procedure SetWhiteSpace(AValue : Boolean);
  128. function GetHangulSyllable : Boolean;inline;
  129. procedure SetHangulSyllable(AValue : Boolean);
  130. function GetNumericValue: Double;inline;
  131. public
  132. CategoryData : Byte;
  133. public
  134. CCC : Byte;
  135. NumericIndex : Byte;
  136. SimpleUpperCase : UInt24;
  137. SimpleLowerCase : UInt24;
  138. DecompositionID : SmallInt;
  139. public
  140. property Category : Byte read GetCategory write SetCategory;
  141. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  142. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  143. property NumericValue : Double read GetNumericValue;
  144. end;
  145. const
  146. BIT_POS_VALIDE = 0;
  147. type
  148. TWeightLength = 0..24;
  149. TUCA_PropWeights = packed record
  150. Weights : array[0..2] of Word;
  151. end;
  152. PUCA_PropWeights = ^TUCA_PropWeights;
  153. TUCA_PropItemContextRec = packed record
  154. public
  155. CodePointCount : Byte;
  156. WeightCount : Byte;
  157. public
  158. function GetCodePoints() : PUInt24;inline;
  159. function GetWeights() : PUCA_PropWeights;inline;
  160. end;
  161. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  162. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  163. TUCA_PropItemContextTreeNodeRec = packed record
  164. public
  165. Left : Word;
  166. Right : Word;
  167. Data : TUCA_PropItemContextRec;
  168. public
  169. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  170. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  171. end;
  172. { TUCA_PropItemContextTreeRec }
  173. TUCA_PropItemContextTreeRec = packed record
  174. public
  175. Size : UInt24;
  176. public
  177. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  178. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  179. function Find(
  180. const AChars : PUInt24;
  181. const ACharCount : Integer;
  182. out ANode : PUCA_PropItemContextTreeNodeRec
  183. ) : Boolean;
  184. end;
  185. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  186. { TUCA_PropItemRec }
  187. TUCA_PropItemRec = packed record
  188. private
  189. const FLAG_CODEPOINT = 1;
  190. const FLAG_CONTEXTUAL = 2;
  191. const FLAG_DELETION = 3;
  192. private
  193. function GetWeightLength: TWeightLength;inline;
  194. //procedure SetWeightLength(AValue: TWeightLength);inline;
  195. function GetCodePoint() : UInt24;inline;
  196. public
  197. Valid : Byte;// On First Bit
  198. ChildCount : Byte;
  199. Size : Word;
  200. Flags : Byte;
  201. public
  202. function HasCodePoint() : Boolean;inline;
  203. property CodePoint : UInt24 read GetCodePoint;
  204. //WeightLength is stored in the 5 last bits of "Valid"
  205. property WeightLength : TWeightLength read GetWeightLength;// write SetWeightLength;
  206. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  207. function IsValid() : Boolean;inline;
  208. //function GetWeightArray() : PUCA_PropWeights;inline;
  209. procedure GetWeightArray(ADest : PUCA_PropWeights);
  210. function GetSelfOnlySize() : Word;inline;
  211. function GetContextual() : Boolean;inline;
  212. property Contextual : Boolean read GetContextual;
  213. function GetContext() : PUCA_PropItemContextTreeRec;
  214. function IsDeleted() : Boolean;inline;
  215. end;
  216. PUCA_PropItemRec = ^TUCA_PropItemRec;
  217. TUCA_VariableKind = (
  218. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  219. ucaIgnoreSP // This one is not implemented !
  220. );
  221. TCollationName = string[128];
  222. PUCA_DataBook = ^TUCA_DataBook;
  223. TUCA_DataBook = packed record
  224. public
  225. Base : PUCA_DataBook;
  226. Version : TCollationName;
  227. CollationName : TCollationName;
  228. VariableWeight : TUCA_VariableKind;
  229. Backwards : array[0..3] of Boolean;
  230. BMP_Table1 : PByte;
  231. BMP_Table2 : PUInt24;
  232. OBMP_Table1 : PWord;
  233. OBMP_Table2 : PUInt24;
  234. PropCount : Integer;
  235. Props : PUCA_PropItemRec;
  236. VariableLowLimit : Word;
  237. VariableHighLimit : Word;
  238. public
  239. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  240. end;
  241. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  242. TCollationFields = set of TCollationField;
  243. const
  244. ROOT_COLLATION_NAME = 'DUCET';
  245. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  246. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  247. function UnicodeIsSurrogatePair(
  248. const AHighSurrogate,
  249. ALowSurrogate : UnicodeChar
  250. ) : Boolean;inline;
  251. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  252. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  253. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  254. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  255. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  256. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  257. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  258. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  259. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  260. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  261. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  262. type
  263. TUCASortKeyItem = Word;
  264. TUCASortKey = array of TUCASortKeyItem;
  265. function ComputeSortKey(
  266. const AString : UnicodeString;
  267. const ACollation : PUCA_DataBook
  268. ) : TUCASortKey;inline;overload;
  269. function ComputeSortKey(
  270. const AStr : PUnicodeChar;
  271. const ALength : SizeInt;
  272. const ACollation : PUCA_DataBook
  273. ) : TUCASortKey;overload;
  274. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  275. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  276. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  277. function UnregisterCollation(const AName : ansistring): Boolean;
  278. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  279. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  280. function GetCollationCount() : Integer;
  281. procedure PrepareCollation(
  282. ACollation : PUCA_DataBook;
  283. const ABaseName : ansistring;
  284. const AChangedFields : TCollationFields
  285. );
  286. resourcestring
  287. SCollationNotFound = 'Collation not found : "%s".';
  288. implementation
  289. uses
  290. unicodenumtable;
  291. type
  292. TCardinalRec = packed record
  293. {$ifdef FPC_LITTLE_ENDIAN}
  294. byte0, byte1, byte2, byte3 : Byte;
  295. {$else FPC_LITTLE_ENDIAN}
  296. byte3, byte2, byte1, byte0 : Byte;
  297. {$endif FPC_LITTLE_ENDIAN}
  298. end;
  299. TWordRec = packed record
  300. {$ifdef FPC_LITTLE_ENDIAN}
  301. byte0, byte1 : Byte;
  302. {$else FPC_LITTLE_ENDIAN}
  303. byte1, byte0 : Byte;
  304. {$endif FPC_LITTLE_ENDIAN}
  305. end;
  306. { TUInt24Rec }
  307. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  308. begin
  309. TCardinalRec(Result).byte0 := a.byte0;
  310. TCardinalRec(Result).byte1 := a.byte1;
  311. TCardinalRec(Result).byte2 := a.byte2;
  312. TCardinalRec(Result).byte3 := 0;
  313. end;
  314. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  315. begin
  316. Result := Cardinal(a);
  317. end;
  318. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  319. begin
  320. {$IFOPT R+}
  321. if (a > $FFFF) then
  322. Error(reIntOverflow);
  323. {$ENDIF R+}
  324. TWordRec(Result).byte0 := a.byte0;
  325. TWordRec(Result).byte1 := a.byte1;
  326. end;
  327. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  328. begin
  329. {$IFOPT R+}
  330. if (a > $FF) then
  331. Error(reIntOverflow);
  332. {$ENDIF R+}
  333. Result := a.byte0;
  334. end;
  335. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  336. begin
  337. {$IFOPT R+}
  338. if (a > $FFFFFF) then
  339. Error(reIntOverflow);
  340. {$ENDIF R+}
  341. Result.byte0 := TCardinalRec(a).byte0;
  342. Result.byte1 := TCardinalRec(a).byte1;
  343. Result.byte2 := TCardinalRec(a).byte2;
  344. end;
  345. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  346. begin
  347. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  348. end;
  349. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  350. begin
  351. Result := (TCardinalRec(b).byte3 = 0) and
  352. (a.byte0 = TCardinalRec(b).byte0) and
  353. (a.byte1 = TCardinalRec(b).byte1) and
  354. (a.byte2 = TCardinalRec(b).byte2);
  355. end;
  356. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  357. begin
  358. Result := (b = a);
  359. end;
  360. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  361. begin
  362. Result := (LongInt(a) = b);
  363. end;
  364. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  365. begin
  366. Result := (b = a);
  367. end;
  368. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  369. begin
  370. Result := (a.byte2 = 0) and
  371. (a.byte0 = TWordRec(b).byte0) and
  372. (a.byte1 = TWordRec(b).byte1);
  373. end;
  374. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  375. begin
  376. Result := (b = a);
  377. end;
  378. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  379. begin
  380. Result := (a.byte2 = 0) and
  381. (a.byte1 = 0) and
  382. (a.byte0 = b);
  383. end;
  384. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  385. begin
  386. Result := (b = a);
  387. end;
  388. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  389. begin
  390. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  391. end;
  392. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  393. begin
  394. Result := (TCardinalRec(b).byte3 <> 0) or
  395. (a.byte0 <> TCardinalRec(b).byte0) or
  396. (a.byte1 <> TCardinalRec(b).byte1) or
  397. (a.byte2 <> TCardinalRec(b).byte2);
  398. end;
  399. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  400. begin
  401. Result := (b <> a);
  402. end;
  403. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  404. begin
  405. Result := (a.byte2 > b.byte2) or
  406. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  407. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  408. end;
  409. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  410. begin
  411. Result := Cardinal(a) > b;
  412. end;
  413. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  414. begin
  415. Result := a > Cardinal(b);
  416. end;
  417. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  418. begin
  419. Result := (a.byte2 > b.byte2) or
  420. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  421. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  422. end;
  423. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  424. begin
  425. Result := Cardinal(a) >= b;
  426. end;
  427. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  428. begin
  429. Result := a >= Cardinal(b);
  430. end;
  431. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  432. begin
  433. Result := (b > a);
  434. end;
  435. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  436. begin
  437. Result := Cardinal(a) < b;
  438. end;
  439. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  440. begin
  441. Result := a < Cardinal(b);
  442. end;
  443. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  444. begin
  445. Result := (b >= a);
  446. end;
  447. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  448. begin
  449. Result := Cardinal(a) <= b;
  450. end;
  451. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  452. begin
  453. Result := a <= Cardinal(b);
  454. end;
  455. var
  456. CollationTable : array of PUCA_DataBook;
  457. function IndexOfCollation(const AName : string) : Integer;
  458. var
  459. i, c : Integer;
  460. p : Pointer;
  461. begin
  462. c := Length(AName);
  463. p := @AName[1];
  464. for i := 0 to Length(CollationTable) - 1 do begin
  465. if (Length(CollationTable[i]^.CollationName) = c) and
  466. CompareMem(@(CollationTable[i]^.CollationName[1]),p,c)
  467. then
  468. exit(i);
  469. end;
  470. Result := -1;
  471. end;
  472. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  473. var
  474. i : Integer;
  475. begin
  476. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  477. if Result then begin
  478. i := Length(CollationTable);
  479. SetLength(CollationTable,(i+1));
  480. CollationTable[i] := ACollation;
  481. end;
  482. end;
  483. function UnregisterCollation(const AName : ansistring): Boolean;
  484. var
  485. i, c : Integer;
  486. begin
  487. i := IndexOfCollation(AName);
  488. Result := (i >= 0);
  489. if Result then begin
  490. c := Length(CollationTable);
  491. if (c = 1) then begin
  492. SetLength(CollationTable,0);
  493. end else begin
  494. CollationTable[i] := CollationTable[c-1];
  495. SetLength(CollationTable,(c-1));
  496. end;
  497. end;
  498. end;
  499. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  500. var
  501. i : Integer;
  502. begin
  503. i := IndexOfCollation(AName);
  504. if (i = -1) then
  505. Result := nil
  506. else
  507. Result := CollationTable[i];
  508. end;
  509. function GetCollationCount() : Integer;
  510. begin
  511. Result := Length(CollationTable);
  512. end;
  513. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  514. begin
  515. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  516. Result := nil
  517. else
  518. Result := CollationTable[AIndex];
  519. end;
  520. procedure PrepareCollation(
  521. ACollation : PUCA_DataBook;
  522. const ABaseName : ansistring;
  523. const AChangedFields : TCollationFields
  524. );
  525. var
  526. s : ansistring;
  527. p, base : PUCA_DataBook;
  528. begin
  529. if (ABaseName <> '') then
  530. s := ABaseName
  531. else
  532. s := ROOT_COLLATION_NAME;
  533. p := ACollation;
  534. base := FindCollation(s);
  535. if (base = nil) then
  536. raise EUnicodeException.CreateFmt(SCollationNotFound,[s]);
  537. p^.Base := base;
  538. if not(TCollationField.BackWard in AChangedFields) then
  539. p^.Backwards := base^.Backwards;
  540. if not(TCollationField.VariableLowLimit in AChangedFields) then
  541. p^.VariableLowLimit := base^.VariableLowLimit;
  542. if not(TCollationField.VariableHighLimit in AChangedFields) then
  543. p^.VariableLowLimit := base^.VariableHighLimit;
  544. end;
  545. {$INCLUDE unicodedata.inc}
  546. {$IFDEF ENDIAN_LITTLE}
  547. {$INCLUDE unicodedata_le.inc}
  548. {$ENDIF ENDIAN_LITTLE}
  549. {$IFDEF ENDIAN_BIG}
  550. {$INCLUDE unicodedata_be.inc}
  551. {$ENDIF ENDIAN_BIG}
  552. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  553. begin
  554. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  555. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  556. end;
  557. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  558. begin
  559. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  560. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  561. end;
  562. function UnicodeIsSurrogatePair(
  563. const AHighSurrogate,
  564. ALowSurrogate : UnicodeChar
  565. ) : Boolean;
  566. begin
  567. Result :=
  568. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  569. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  570. ) and
  571. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  572. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  573. )
  574. end;
  575. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  576. begin
  577. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  578. (Word(AValue) <= HIGH_SURROGATE_END);
  579. end;
  580. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  581. begin
  582. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  583. (Word(AValue) <= LOW_SURROGATE_END);
  584. end;
  585. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  586. begin
  587. Result:=
  588. @UC_PROP_ARRAY[
  589. UC_TABLE_3[
  590. UC_TABLE_2[UC_TABLE_1[WordRec(ACodePoint).Hi]]
  591. [WordRec(ACodePoint).Lo shr 4]
  592. ][WordRec(ACodePoint).Lo and $F]
  593. ]; {
  594. @UC_PROP_ARRAY[
  595. UC_TABLE_2[
  596. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  597. WordRec(ACodePoint).Lo
  598. ]
  599. ];}
  600. end;
  601. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  602. begin
  603. Result:=
  604. @UC_PROP_ARRAY[
  605. UCO_TABLE_3[
  606. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  607. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  608. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  609. ]; {
  610. Result:=
  611. @UC_PROP_ARRAY[
  612. UCO_TABLE_2[
  613. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  614. Word(ALowS) - LOW_SURROGATE_BEGIN
  615. ]
  616. ]; }
  617. end;
  618. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  619. var
  620. l, h : UnicodeChar;
  621. begin
  622. if (ACodePoint <= High(Word)) then
  623. exit(GetProps(Word(ACodePoint)));
  624. FromUCS4(ACodePoint,h,l);
  625. Result := GetProps(h,l);
  626. end;
  627. //----------------------------------------------------------------------
  628. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  629. const
  630. SBase = $AC00;
  631. LBase = $1100;
  632. VBase = $1161;
  633. TBase = $11A7;
  634. LCount = 19;
  635. VCount = 21;
  636. TCount = 28;
  637. NCount = VCount * TCount; // 588
  638. SCount = LCount * NCount; // 11172
  639. var
  640. SIndex, L, V, T : Integer;
  641. begin
  642. SIndex := AChar - SBase;
  643. if (SIndex < 0) or (SIndex >= SCount) then begin
  644. ABuffer^ := AChar;
  645. exit(1);
  646. end;
  647. L := LBase + SIndex div NCount;
  648. V := VBase + (SIndex mod NCount) div TCount;
  649. T := TBase + SIndex mod TCount;
  650. ABuffer[0] := L;
  651. ABuffer[1] := V;
  652. Result := 2;
  653. if (T <> TBase) then begin
  654. ABuffer[2] := T;
  655. Inc(Result);
  656. end;
  657. end;
  658. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  659. var
  660. locStack : array[0..23] of Cardinal;
  661. locStackIdx : Integer;
  662. ResultBuffer : array[0..23] of Cardinal;
  663. ResultIdx : Integer;
  664. procedure AddCompositionToStack(const AIndex : Integer);
  665. var
  666. pdecIdx : ^TDecompositionIndexRec;
  667. k, kc : Integer;
  668. pu : ^UInt24;
  669. begin
  670. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  671. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  672. kc := pdecIdx^.Length;
  673. Inc(pu,kc);
  674. for k := 1 to kc do begin
  675. Dec(pu);
  676. locStack[locStackIdx + k] := pu^;
  677. end;
  678. locStackIdx := locStackIdx + kc;
  679. end;
  680. procedure AddResult(const AChar : Cardinal);inline;
  681. begin
  682. Inc(ResultIdx);
  683. ResultBuffer[ResultIdx] := AChar;
  684. end;
  685. function PopStack() : Cardinal;inline;
  686. begin
  687. Result := locStack[locStackIdx];
  688. Dec(locStackIdx);
  689. end;
  690. var
  691. cu : Cardinal;
  692. decIdx : SmallInt;
  693. locIsWord : Boolean;
  694. i : Integer;
  695. p : PUnicodeChar;
  696. begin
  697. ResultIdx := -1;
  698. locStackIdx := -1;
  699. AddCompositionToStack(ADecomposeIndex);
  700. while (locStackIdx >= 0) do begin
  701. cu := PopStack();
  702. locIsWord := (cu <= MAX_WORD);
  703. if locIsWord then
  704. decIdx := GetProps(Word(cu))^.DecompositionID
  705. else
  706. decIdx := GetProps(cu)^.DecompositionID;
  707. if (decIdx = -1) then
  708. AddResult(cu)
  709. else
  710. AddCompositionToStack(decIdx);
  711. end;
  712. p := ABuffer;
  713. Result := 0;
  714. for i := 0 to ResultIdx do begin
  715. cu := ResultBuffer[i];
  716. if (cu <= MAX_WORD) then begin
  717. p[0] := UnicodeChar(Word(cu));
  718. Inc(p);
  719. end else begin
  720. FromUCS4(cu,p[0],p[1]);
  721. Inc(p,2);
  722. Inc(Result);
  723. end;
  724. end;
  725. Result := Result + ResultIdx + 1;
  726. end;
  727. procedure CanonicalOrder(var AString : UnicodeString);
  728. begin
  729. CanonicalOrder(@AString[1],Length(AString));
  730. end;
  731. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  732. var
  733. i, c : SizeInt;
  734. p, q : PUnicodeChar;
  735. locIsSurrogateP, locIsSurrogateQ : Boolean;
  736. procedure Swap();
  737. var
  738. t, t1 : UnicodeChar;
  739. begin
  740. if not locIsSurrogateP then begin
  741. if not locIsSurrogateQ then begin
  742. t := p^;
  743. p^ := q^;
  744. q^ := t;
  745. exit;
  746. end;
  747. t := p^;
  748. p[0] := q[0];
  749. p[1] := q[1];
  750. q[1] := t;
  751. exit;
  752. end;
  753. if not locIsSurrogateQ then begin
  754. t := q[0];
  755. p[2] := p[1];
  756. p[1] := p[0];
  757. p[0] := t;
  758. exit;
  759. end;
  760. t := p[0];
  761. t1 := p[1];
  762. p[0] := q[0];
  763. p[1] := q[1];
  764. q[0] := t;
  765. q[1] := t1;
  766. end;
  767. var
  768. pu : PUC_Prop;
  769. cccp, cccq : Byte;
  770. begin
  771. c := ALength;
  772. if (c < 2) then
  773. exit;
  774. p := AStr;
  775. i := 1;
  776. while (i < c) do begin
  777. pu := GetProps(Word(p^));
  778. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  779. if locIsSurrogateP then begin
  780. if (i = (c - 1)) then
  781. Break;
  782. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  783. Inc(p);
  784. Inc(i);
  785. Continue;
  786. end;
  787. pu := GetProps(p[0],p[1]);
  788. end;
  789. if (pu^.CCC > 0) then begin
  790. cccp := pu^.CCC;
  791. if locIsSurrogateP then
  792. q := p + 2
  793. else
  794. q := p + 1;
  795. pu := GetProps(Word(q^));
  796. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  797. if locIsSurrogateQ then begin
  798. if (i = c) then
  799. Break;
  800. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  801. Inc(p);
  802. Inc(i);
  803. Continue;
  804. end;
  805. pu := GetProps(q[0],q[1]);
  806. end;
  807. cccq := pu^.CCC;
  808. if (cccq > 0) and (cccp > cccq) then begin
  809. Swap();
  810. if (i > 1) then begin
  811. Dec(p);
  812. Dec(i);
  813. pu := GetProps(Word(p^));
  814. if (pu^.Category = UGC_Surrogate) then begin
  815. if (i > 1) then begin
  816. Dec(p);
  817. Dec(i);
  818. end;
  819. end;
  820. Continue;
  821. end;
  822. end;
  823. end;
  824. if locIsSurrogateP then begin
  825. Inc(p);
  826. Inc(i);
  827. end;
  828. Inc(p);
  829. Inc(i);
  830. end;
  831. end;
  832. //Canonical Decomposition
  833. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  834. begin
  835. Result := NormalizeNFD(@AString[1],Length(AString));
  836. end;
  837. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  838. const MAX_EXPAND = 3;
  839. var
  840. i, c, kc, k : SizeInt;
  841. pp, pr : PUnicodeChar;
  842. pu : PUC_Prop;
  843. locIsSurrogate : Boolean;
  844. cpArray : array[0..7] of Cardinal;
  845. cp : Cardinal;
  846. begin
  847. c := ALength;
  848. SetLength(Result,(MAX_EXPAND*c));
  849. if (c > 0) then begin
  850. pp := AStr;
  851. pr := @Result[1];
  852. i := 1;
  853. while (i <= c) do begin
  854. pu := GetProps(Word(pp^));
  855. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  856. if locIsSurrogate then begin
  857. if (i = c) then
  858. Break;
  859. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  860. pr^ := pp^;
  861. Inc(pp);
  862. Inc(pr);
  863. Inc(i);
  864. Continue;
  865. end;
  866. pu := GetProps(pp[0],pp[1]);
  867. end;
  868. if pu^.HangulSyllable then begin
  869. if locIsSurrogate then begin
  870. cp := ToUCS4(pp[0],pp[1]);
  871. Inc(pp);
  872. Inc(i);
  873. end else begin
  874. cp := Word(pp^);
  875. end;
  876. kc := DecomposeHangul(cp,@cpArray[0]);
  877. for k := 0 to kc - 1 do begin
  878. if (cpArray[k] <= MAX_WORD) then begin
  879. pr^ := UnicodeChar(Word(cpArray[k]));
  880. pr := pr + 1;
  881. end else begin
  882. FromUCS4(cpArray[k],pr[0],pr[1]);
  883. pr := pr + 2;
  884. end;
  885. end;
  886. if (kc > 0) then
  887. Dec(pr);
  888. end else begin
  889. if (pu^.DecompositionID = -1) then begin
  890. pr^ := pp^;
  891. if locIsSurrogate then begin
  892. Inc(pp);
  893. Inc(pr);
  894. Inc(i);
  895. pr^ := pp^;
  896. end;
  897. end else begin
  898. k := Decompose(pu^.DecompositionID,pr);
  899. pr := pr + (k - 1);
  900. if locIsSurrogate then begin
  901. Inc(pp);
  902. Inc(i);
  903. end;
  904. end;
  905. end;
  906. Inc(pp);
  907. Inc(pr);
  908. Inc(i);
  909. end;
  910. Dec(pp);
  911. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  912. SetLength(Result,i);
  913. CanonicalOrder(@Result[1],Length(Result));
  914. end;
  915. end;
  916. type
  917. TBitOrder = 0..7;
  918. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  919. begin
  920. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  921. end;
  922. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  923. begin
  924. if AValue then
  925. AData := AData or (1 shl (ABit mod 8))
  926. else
  927. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  928. end;
  929. { TUCA_PropItemContextTreeNodeRec }
  930. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  931. begin
  932. if (Self.Left = 0) then
  933. Result := nil
  934. else
  935. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  936. end;
  937. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  938. begin
  939. if (Self.Right = 0) then
  940. Result := nil
  941. else
  942. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  943. end;
  944. { TUCA_PropItemContextRec }
  945. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  946. begin
  947. Result := PUInt24(
  948. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  949. SizeOf(Self.WeightCount)
  950. );
  951. end;
  952. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  953. begin
  954. Result := PUCA_PropWeights(
  955. PtrUInt(@Self) +
  956. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  957. (Self.CodePointCount*SizeOf(UInt24))
  958. );
  959. end;
  960. { TUCA_PropItemContextTreeRec }
  961. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  962. begin
  963. if (Size = 0) then
  964. Result := nil
  965. else
  966. Result := PUCA_PropItemContextTreeNodeRec(
  967. PtrUInt(
  968. PtrUInt(@Self) + SizeOf(UInt24){Size}
  969. )
  970. );
  971. end;
  972. function CompareCodePoints(
  973. A : PUInt24; LA : Integer;
  974. B : PUInt24; LB : Integer
  975. ) : Integer;
  976. var
  977. i, hb : Integer;
  978. begin
  979. if (A = B) then
  980. exit(0);
  981. Result := 1;
  982. hb := LB - 1;
  983. for i := 0 to LA - 1 do begin
  984. if (i > hb) then
  985. exit;
  986. if (A[i] < B[i]) then
  987. exit(-1);
  988. if (A[i] > B[i]) then
  989. exit(1);
  990. end;
  991. if (LA = LB) then
  992. exit(0);
  993. exit(-1);
  994. end;
  995. function TUCA_PropItemContextTreeRec.Find(
  996. const AChars : PUInt24;
  997. const ACharCount : Integer;
  998. out ANode : PUCA_PropItemContextTreeNodeRec
  999. ) : Boolean;
  1000. var
  1001. t : PUCA_PropItemContextTreeNodeRec;
  1002. begin
  1003. t := Data;
  1004. while (t <> nil) do begin
  1005. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1006. 0 : Break;
  1007. -1 : t := t^.GetLeftNode();
  1008. else
  1009. t := t^.GetRightNode();
  1010. end;
  1011. end;
  1012. Result := (t <> nil);
  1013. if Result then
  1014. ANode := t;
  1015. end;
  1016. { TUC_Prop }
  1017. function TUC_Prop.GetCategory: Byte;
  1018. begin
  1019. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1020. end;
  1021. function TUC_Prop.GetNumericValue: Double;
  1022. begin
  1023. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1024. end;
  1025. procedure TUC_Prop.SetCategory(AValue: Byte);
  1026. begin
  1027. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1028. end;
  1029. function TUC_Prop.GetWhiteSpace: Boolean;
  1030. begin
  1031. Result := IsBitON(CategoryData,0);
  1032. end;
  1033. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1034. begin
  1035. SetBit(CategoryData,0,AValue);
  1036. end;
  1037. function TUC_Prop.GetHangulSyllable: Boolean;
  1038. begin
  1039. Result := IsBitON(CategoryData,1);
  1040. end;
  1041. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1042. begin
  1043. SetBit(CategoryData,1,AValue);
  1044. end;
  1045. { TUCA_DataBook }
  1046. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1047. begin
  1048. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1049. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1050. end;
  1051. { TUCA_PropItemRec }
  1052. function TUCA_PropItemRec.GetWeightLength: TWeightLength;
  1053. begin
  1054. Result := TWeightLength(Valid and Byte($F8) shr 3);
  1055. end;
  1056. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1057. begin
  1058. if HasCodePoint() then begin
  1059. if Contextual then
  1060. Result := PUInt24(
  1061. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1062. Word(GetContext()^.Size)
  1063. )^
  1064. else
  1065. Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  1066. end else begin
  1067. {$ifdef uni_debug}
  1068. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1069. {$else uni_debug}
  1070. Result := ZERO_UINT24;
  1071. {$endif uni_debug}
  1072. end
  1073. end;
  1074. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1075. begin
  1076. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1077. end;
  1078. {procedure TUCA_PropItemRec.SetWeightLength(AValue: TWeightLength);
  1079. begin
  1080. Valid := Valid or Byte(Byte(AValue) shl 3);
  1081. end;}
  1082. function TUCA_PropItemRec.IsValid() : Boolean;
  1083. begin
  1084. Result := IsBitON(Valid,BIT_POS_VALIDE);
  1085. end;
  1086. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1087. begin
  1088. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1089. end;}
  1090. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1091. var
  1092. i, c : Integer;
  1093. p : PByte;
  1094. pd : PUCA_PropWeights;
  1095. begin
  1096. c := WeightLength;
  1097. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1098. pd := ADest;
  1099. pd^.Weights[0] := PWord(p)^;
  1100. p := p + 2;
  1101. if IsBitON(Self.Valid,(BIT_POS_VALIDE+1)) then begin
  1102. pd^.Weights[1] := PWord(p)^;
  1103. p := p + 2;
  1104. end else begin
  1105. pd^.Weights[1] := p^;
  1106. p := p + 1;
  1107. end;
  1108. if IsBitON(Self.Valid,(BIT_POS_VALIDE+2)) then begin
  1109. pd^.Weights[2] := PWord(p)^;
  1110. p := p + 2;
  1111. end else begin
  1112. pd^.Weights[2] := p^;
  1113. p := p + 1;
  1114. end;
  1115. if (c > 1) then
  1116. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1117. end;
  1118. function TUCA_PropItemRec.GetSelfOnlySize() : Word;
  1119. begin
  1120. Result := SizeOf(TUCA_PropItemRec);
  1121. if (WeightLength > 0) then begin
  1122. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1123. if not IsBitON(Self.Valid,(BIT_POS_VALIDE+1)) then
  1124. Result := Result - 1;
  1125. if not IsBitON(Self.Valid,(BIT_POS_VALIDE+2)) then
  1126. Result := Result - 1;
  1127. end;
  1128. if HasCodePoint() then
  1129. Result := Result + SizeOf(UInt24);
  1130. if Contextual then
  1131. Result := Result + Word(GetContext()^.Size);
  1132. end;
  1133. function TUCA_PropItemRec.GetContextual: Boolean;
  1134. begin
  1135. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1136. end;
  1137. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1138. var
  1139. p : PtrUInt;
  1140. begin
  1141. if not Contextual then
  1142. exit(nil);
  1143. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1144. if IsBitON(Flags,FLAG_CODEPOINT) then
  1145. p := p + SizeOf(UInt24);
  1146. Result := PUCA_PropItemContextTreeRec(p);
  1147. end;
  1148. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1149. begin
  1150. Result := IsBitON(Flags,FLAG_DELETION);
  1151. end;
  1152. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1153. var
  1154. i : Cardinal;
  1155. begin
  1156. if (ABook^.BMP_Table2 = nil) then
  1157. exit(nil);
  1158. i := ABook^.BMP_Table2[
  1159. (ABook^.BMP_Table1[WordRec(AChar).Hi] * 256) +
  1160. WordRec(AChar).Lo
  1161. ];
  1162. if (i > 0) then
  1163. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1164. else
  1165. Result := nil;
  1166. end;
  1167. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1168. var
  1169. i : Cardinal;
  1170. begin
  1171. if (ABook^.OBMP_Table2 = nil) then
  1172. exit(nil);
  1173. i := ABook^.OBMP_Table2[
  1174. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1175. Word(ALowS) - LOW_SURROGATE_BEGIN
  1176. ];
  1177. if (i > 0) then
  1178. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1179. else
  1180. Result := nil;
  1181. end;
  1182. {$include weight_derivation.inc}
  1183. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1184. var
  1185. bb : TUCASortKey;
  1186. begin
  1187. SetLength(bb,Length(B));
  1188. if (Length(bb) > 0) then
  1189. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1190. Result := CompareSortKey(A,bb);
  1191. end;
  1192. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1193. var
  1194. i, hb : Integer;
  1195. begin
  1196. if (Pointer(A) = Pointer(B)) then
  1197. exit(0);
  1198. Result := 1;
  1199. hb := Length(B) - 1;
  1200. for i := 0 to Length(A) - 1 do begin
  1201. if (i > hb) then
  1202. exit;
  1203. if (A[i] < B[i]) then
  1204. exit(-1);
  1205. if (A[i] > B[i]) then
  1206. exit(1);
  1207. end;
  1208. if (Length(A) = Length(B)) then
  1209. exit(0);
  1210. exit(-1);
  1211. end;
  1212. type
  1213. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1214. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1215. var
  1216. r : TUCASortKey;
  1217. i, c, k, ral, levelCount : Integer;
  1218. pce : PUCA_PropWeights;
  1219. begin
  1220. c := Length(ACEList);
  1221. if (c = 0) then
  1222. exit(nil);
  1223. levelCount := Length(ACEList[0].Weights);
  1224. SetLength(r,(levelCount*c + levelCount));
  1225. ral := 0;
  1226. for i := 0 to levelCount - 1 do begin
  1227. if not ACollation^.Backwards[i] then begin
  1228. pce := @ACEList[0];
  1229. for k := 0 to c - 1 do begin
  1230. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1231. r[ral] := pce^.Weights[i];
  1232. ral := ral + 1;
  1233. end;
  1234. pce := pce + 1;
  1235. end;
  1236. end else begin
  1237. pce := @ACEList[c-1];
  1238. for k := 0 to c - 1 do begin
  1239. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1240. r[ral] := pce^.Weights[i];
  1241. ral := ral + 1;
  1242. end;
  1243. pce := pce - 1;
  1244. end;
  1245. end;
  1246. r[ral] := 0;
  1247. ral := ral + 1;
  1248. end;
  1249. ral := ral - 1;
  1250. SetLength(r,ral);
  1251. Result := r;
  1252. end;
  1253. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1254. var
  1255. r : TUCASortKey;
  1256. i, c, k, ral, levelCount : Integer;
  1257. pce : PUCA_PropWeights;
  1258. begin
  1259. c := Length(ACEList);
  1260. if (c = 0) then
  1261. exit(nil);
  1262. levelCount := Length(ACEList[0].Weights);
  1263. SetLength(r,(levelCount*c + levelCount));
  1264. ral := 0;
  1265. for i := 0 to levelCount - 1 do begin
  1266. if not ACollation^.Backwards[i] then begin
  1267. pce := @ACEList[0];
  1268. for k := 0 to c - 1 do begin
  1269. if (pce^.Weights[i] <> 0) then begin
  1270. r[ral] := pce^.Weights[i];
  1271. ral := ral + 1;
  1272. end;
  1273. pce := pce + 1;
  1274. end;
  1275. end else begin
  1276. pce := @ACEList[c-1];
  1277. for k := 0 to c - 1 do begin
  1278. if (pce^.Weights[i] <> 0) then begin
  1279. r[ral] := pce^.Weights[i];
  1280. ral := ral + 1;
  1281. end;
  1282. pce := pce - 1;
  1283. end;
  1284. end;
  1285. r[ral] := 0;
  1286. ral := ral + 1;
  1287. end;
  1288. ral := ral - 1;
  1289. SetLength(r,ral);
  1290. Result := r;
  1291. end;
  1292. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1293. var
  1294. r : TUCASortKey;
  1295. i, c, k, ral, levelCount : Integer;
  1296. pce : PUCA_PropWeights;
  1297. variableState : Boolean;
  1298. begin
  1299. c := Length(ACEList);
  1300. if (c = 0) then
  1301. exit(nil);
  1302. levelCount := Length(ACEList[0].Weights);
  1303. SetLength(r,(levelCount*c + levelCount));
  1304. ral := 0;
  1305. for i := 0 to levelCount - 1 do begin
  1306. if not ACollation^.Backwards[i] then begin
  1307. variableState := False;
  1308. pce := @ACEList[0];
  1309. for k := 0 to c - 1 do begin
  1310. if not ACollation^.IsVariable(pce) then begin
  1311. if (pce^.Weights[0] <> 0) then
  1312. variableState := False;
  1313. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1314. r[ral] := pce^.Weights[i];
  1315. ral := ral + 1;
  1316. end;
  1317. end else begin
  1318. variableState := True;
  1319. end;
  1320. pce := pce + 1;
  1321. end;
  1322. end else begin
  1323. pce := @ACEList[c-1];
  1324. for k := 0 to c - 1 do begin
  1325. if not ACollation^.IsVariable(pce) then begin
  1326. if (pce^.Weights[0] <> 0) then
  1327. variableState := False;
  1328. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1329. r[ral] := pce^.Weights[i];
  1330. ral := ral + 1;
  1331. end;
  1332. end else begin
  1333. variableState := True;
  1334. end;
  1335. pce := pce - 1;
  1336. end;
  1337. end;
  1338. r[ral] := 0;
  1339. ral := ral + 1;
  1340. end;
  1341. ral := ral - 1;
  1342. SetLength(r,ral);
  1343. Result := r;
  1344. end;
  1345. function FormKeyShiftedTrimmed(
  1346. const ACEList : TUCA_PropWeightsArray;
  1347. const ACollation : PUCA_DataBook
  1348. ) : TUCASortKey;
  1349. var
  1350. i : Integer;
  1351. p : ^TUCASortKeyItem;
  1352. begin
  1353. Result := FormKeyShifted(ACEList,ACollation);
  1354. i := Length(Result) - 1;
  1355. if (i >= 0) then begin
  1356. p := @Result[i];
  1357. while (i >= 0) do begin
  1358. if (p^ <> $FFFF) then
  1359. Break;
  1360. Dec(i);
  1361. Dec(p);
  1362. end;
  1363. if ((i+1) < Length(Result)) then
  1364. SetLength(Result,(i+1));
  1365. end;
  1366. end;
  1367. function FindChild(
  1368. const ACodePoint : Cardinal;
  1369. const AParent : PUCA_PropItemRec
  1370. ) : PUCA_PropItemRec;inline;
  1371. var
  1372. k : Integer;
  1373. begin
  1374. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1375. for k := 0 to AParent^.ChildCount - 1 do begin
  1376. if (ACodePoint = Result^.CodePoint) then
  1377. exit;
  1378. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1379. end;
  1380. Result := nil;
  1381. end;
  1382. function ComputeSortKey(
  1383. const AString : UnicodeString;
  1384. const ACollation : PUCA_DataBook
  1385. ) : TUCASortKey;
  1386. begin
  1387. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1388. end;
  1389. function ComputeRawSortKey(
  1390. const AStr : PUnicodeChar;
  1391. const ALength : SizeInt;
  1392. const ACollation : PUCA_DataBook
  1393. ) : TUCA_PropWeightsArray;
  1394. var
  1395. r : TUCA_PropWeightsArray;
  1396. ral {used length of "r"}: Integer;
  1397. rl {capacity of "r"} : Integer;
  1398. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1399. begin
  1400. if (rl < AMinGrow) then
  1401. rl := rl + AMinGrow
  1402. else
  1403. rl := 2 * rl;
  1404. SetLength(r,rl);
  1405. end;
  1406. var
  1407. i : Integer;
  1408. s : UnicodeString;
  1409. ps : PUnicodeChar;
  1410. cp : Cardinal;
  1411. cl : PUCA_DataBook;
  1412. pp : PUCA_PropItemRec;
  1413. ppLevel : Byte;
  1414. removedCharIndex : array of DWord;
  1415. removedCharIndexLength : DWord;
  1416. locHistory : array[0..24] of record
  1417. i : Integer;
  1418. cl : PUCA_DataBook;
  1419. pp : PUCA_PropItemRec;
  1420. ppLevel : Byte;
  1421. cp : Cardinal;
  1422. removedCharIndexLength : DWord;
  1423. end;
  1424. locHistoryTop : Integer;
  1425. suppressState : record
  1426. cl : PUCA_DataBook;
  1427. CharCount : Integer;
  1428. end;
  1429. LastKeyOwner : record
  1430. Length : Integer;
  1431. Chars : array[0..24] of UInt24;
  1432. end;
  1433. procedure SaveKeyOwner();
  1434. var
  1435. k : Integer;
  1436. kppLevel : Byte;
  1437. begin
  1438. k := 0;
  1439. kppLevel := High(Byte);
  1440. while (k <= locHistoryTop) do begin
  1441. if (kppLevel <> locHistory[k].ppLevel) then begin
  1442. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1443. kppLevel := locHistory[k].ppLevel;
  1444. end;
  1445. k := k + 1;
  1446. end;
  1447. if (k = 0) or (kppLevel <> ppLevel) then begin
  1448. LastKeyOwner.Chars[k] := cp;
  1449. k := k + 1;
  1450. end;
  1451. LastKeyOwner.Length := k;
  1452. end;
  1453. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1454. begin
  1455. SaveKeyOwner();
  1456. if ((ral + AItem^.WeightLength) > rl) then
  1457. GrowKey(AItem^.WeightLength);
  1458. AItem^.GetWeightArray(@r[ral]);
  1459. ral := ral + AItem^.WeightLength;
  1460. end;
  1461. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1462. begin
  1463. if ((ral + AItem^.WeightCount) > rl) then
  1464. GrowKey(AItem^.WeightCount);
  1465. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1466. ral := ral + AItem^.WeightCount;
  1467. end;
  1468. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1469. begin
  1470. SaveKeyOwner();
  1471. if ((ral + 2) > rl) then
  1472. GrowKey();
  1473. DeriveWeight(ACodePoint,@r[ral]);
  1474. ral := ral + 2;
  1475. end;
  1476. procedure RecordDeletion();inline;
  1477. begin
  1478. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1479. if (suppressState.cl = nil) or
  1480. (suppressState.CharCount > ppLevel)
  1481. then begin
  1482. suppressState.cl := cl;
  1483. suppressState.CharCount := ppLevel;
  1484. end;
  1485. end;
  1486. end;
  1487. procedure RecordStep();inline;
  1488. begin
  1489. Inc(locHistoryTop);
  1490. locHistory[locHistoryTop].i := i;
  1491. locHistory[locHistoryTop].cl := cl;
  1492. locHistory[locHistoryTop].pp := pp;
  1493. locHistory[locHistoryTop].ppLevel := ppLevel;
  1494. locHistory[locHistoryTop].cp := cp;
  1495. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1496. RecordDeletion();
  1497. end;
  1498. procedure ClearHistory();inline;
  1499. begin
  1500. locHistoryTop := -1;
  1501. end;
  1502. function HasHistory() : Boolean;inline;
  1503. begin
  1504. Result := (locHistoryTop >= 0);
  1505. end;
  1506. function GetHistoryLength() : Integer;inline;
  1507. begin
  1508. Result := (locHistoryTop + 1);
  1509. end;
  1510. procedure GoBack();inline;
  1511. begin
  1512. Assert(locHistoryTop >= 0);
  1513. i := locHistory[locHistoryTop].i;
  1514. cp := locHistory[locHistoryTop].cp;
  1515. cl := locHistory[locHistoryTop].cl;
  1516. pp := locHistory[locHistoryTop].pp;
  1517. ppLevel := locHistory[locHistoryTop].ppLevel;
  1518. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1519. ps := @s[i];
  1520. Dec(locHistoryTop);
  1521. end;
  1522. var
  1523. c : Integer;
  1524. lastUnblockedNonstarterCCC : Byte;
  1525. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1526. var
  1527. k : DWord;
  1528. pk : PUnicodeChar;
  1529. puk : PUC_Prop;
  1530. begin
  1531. k := AStartFrom;
  1532. if (k > c) then
  1533. exit(False);
  1534. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0) then
  1535. exit(False);
  1536. {if (k = (i+1)) or
  1537. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1538. then
  1539. lastUnblockedNonstarterCCC := 0;}
  1540. pk := @s[k];
  1541. if UnicodeIsHighSurrogate(pk^) then begin
  1542. if (k = c) then
  1543. exit(False);
  1544. if UnicodeIsLowSurrogate(pk[1]) then
  1545. puk := GetProps(pk[0],pk[1])
  1546. else
  1547. puk := GetProps(Word(pk^));
  1548. end else begin
  1549. puk := GetProps(Word(pk^));
  1550. end;
  1551. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1552. exit(False);
  1553. lastUnblockedNonstarterCCC := puk^.CCC;
  1554. Result := True;
  1555. end;
  1556. procedure RemoveChar(APos : Integer);inline;
  1557. begin
  1558. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1559. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1560. removedCharIndex[removedCharIndexLength] := APos;
  1561. Inc(removedCharIndexLength);
  1562. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  1563. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1564. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1565. removedCharIndex[removedCharIndexLength] := APos+1;
  1566. Inc(removedCharIndexLength);
  1567. end;
  1568. end;
  1569. procedure Inc_I();inline;
  1570. begin
  1571. if (removedCharIndexLength = 0) then begin
  1572. Inc(i);
  1573. Inc(ps);
  1574. exit;
  1575. end;
  1576. while True do begin
  1577. Inc(i);
  1578. Inc(ps);
  1579. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  1580. Break;
  1581. end;
  1582. end;
  1583. var
  1584. surrogateState : Boolean;
  1585. function MoveToNextChar() : Boolean;inline;
  1586. begin
  1587. Result := True;
  1588. if UnicodeIsHighSurrogate(ps[0]) then begin
  1589. if (i = c) then
  1590. exit(False);
  1591. if UnicodeIsLowSurrogate(ps[1]) then begin
  1592. surrogateState := True;
  1593. cp := ToUCS4(ps[0],ps[1]);
  1594. end else begin
  1595. surrogateState := False;
  1596. cp := Word(ps[0]);
  1597. end;
  1598. end else begin
  1599. surrogateState := False;
  1600. cp := Word(ps[0]);
  1601. end;
  1602. end;
  1603. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  1604. begin
  1605. cl := nil;
  1606. pp := nil;
  1607. ppLevel := 0;
  1608. if AClearSuppressInfo then begin
  1609. suppressState.cl := nil;
  1610. suppressState.CharCount := 0;
  1611. end;
  1612. end;
  1613. function FindPropUCA() : Boolean;
  1614. var
  1615. candidateCL : PUCA_DataBook;
  1616. begin
  1617. pp := nil;
  1618. if (cl = nil) then
  1619. candidateCL := ACollation
  1620. else
  1621. candidateCL := cl;
  1622. if surrogateState then begin
  1623. while (candidateCL <> nil) do begin
  1624. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  1625. if (pp <> nil) then
  1626. break;
  1627. candidateCL := candidateCL^.Base;
  1628. end;
  1629. end else begin
  1630. while (candidateCL <> nil) do begin
  1631. pp := GetPropUCA(ps[0],candidateCL);
  1632. if (pp <> nil) then
  1633. break;
  1634. candidateCL := candidateCL^.Base;
  1635. end;
  1636. end;
  1637. cl := candidateCL;
  1638. Result := (pp <> nil);
  1639. end;
  1640. procedure AddWeightsAndClear();inline;
  1641. var
  1642. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1643. begin
  1644. if (pp^.GetWeightLength() > 0) then begin
  1645. AddWeights(pp);
  1646. end else
  1647. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1648. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1649. (ctxNode^.Data.WeightCount > 0)
  1650. then begin
  1651. AddContextWeights(@ctxNode^.Data);
  1652. end;
  1653. //AddWeights(pp);
  1654. ClearHistory();
  1655. ClearPP();
  1656. end;
  1657. procedure StartMatch();
  1658. procedure HandleLastChar();
  1659. var
  1660. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1661. begin
  1662. while True do begin
  1663. if pp^.IsValid() then begin
  1664. if (pp^.GetWeightLength() > 0) then
  1665. AddWeights(pp)
  1666. else
  1667. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1668. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1669. (ctxNode^.Data.WeightCount > 0)
  1670. then
  1671. AddContextWeights(@ctxNode^.Data)
  1672. else
  1673. AddComputedWeights(cp){handle deletion of code point};
  1674. break;
  1675. end;
  1676. if (cl^.Base = nil) then begin
  1677. AddComputedWeights(cp);
  1678. break;
  1679. end;
  1680. cl := cl^.Base;
  1681. if not FindPropUCA() then begin
  1682. AddComputedWeights(cp);
  1683. break;
  1684. end;
  1685. end;
  1686. end;
  1687. var
  1688. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  1689. begin
  1690. ppLevel := 0;
  1691. if not FindPropUCA() then begin
  1692. AddComputedWeights(cp);
  1693. ClearHistory();
  1694. ClearPP();
  1695. end else begin
  1696. if (i = c) then begin
  1697. HandleLastChar();
  1698. end else begin
  1699. if pp^.IsValid()then begin
  1700. if (pp^.ChildCount = 0) then begin
  1701. if (pp^.GetWeightLength() > 0) then
  1702. AddWeights(pp)
  1703. else
  1704. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1705. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  1706. (tmpCtxNode^.Data.WeightCount > 0)
  1707. then
  1708. AddContextWeights(@tmpCtxNode^.Data)
  1709. else
  1710. AddComputedWeights(cp){handle deletion of code point};
  1711. ClearPP();
  1712. ClearHistory();
  1713. end else begin
  1714. RecordStep();
  1715. end
  1716. end else begin
  1717. if (pp^.ChildCount = 0) then begin
  1718. AddComputedWeights(cp);
  1719. ClearPP();
  1720. ClearHistory();
  1721. end else begin
  1722. RecordStep();
  1723. end;
  1724. end ;
  1725. end;
  1726. end;
  1727. end;
  1728. function TryPermutation() : Boolean;
  1729. var
  1730. kk : Integer;
  1731. b : Boolean;
  1732. puk : PUC_Prop;
  1733. ppk : PUCA_PropItemRec;
  1734. begin
  1735. Result := False;
  1736. puk := GetProps(cp);
  1737. if (puk^.CCC = 0) then
  1738. exit;
  1739. lastUnblockedNonstarterCCC := puk^.CCC;
  1740. if surrogateState then
  1741. kk := i + 2
  1742. else
  1743. kk := i + 1;
  1744. while IsUnblockedNonstarter(kk) do begin
  1745. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  1746. if b then
  1747. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  1748. else
  1749. ppk := FindChild(Word(s[kk]),pp);
  1750. if (ppk <> nil) then begin
  1751. pp := ppk;
  1752. RemoveChar(kk);
  1753. Inc(ppLevel);
  1754. RecordStep();
  1755. Result := True;
  1756. if (pp^.ChildCount = 0 ) then
  1757. Break;
  1758. end;
  1759. if b then
  1760. Inc(kk);
  1761. Inc(kk);
  1762. end;
  1763. end;
  1764. procedure AdvanceCharPos();inline;
  1765. begin
  1766. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  1767. Inc(i);
  1768. Inc(ps);
  1769. end;
  1770. Inc_I();
  1771. end;
  1772. var
  1773. ok : Boolean;
  1774. pp1 : PUCA_PropItemRec;
  1775. cltemp : PUCA_DataBook;
  1776. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1777. begin
  1778. if (ALength = 0) then
  1779. exit(nil);
  1780. c := ALength;
  1781. s := NormalizeNFD(AStr,c);
  1782. c := Length(s);
  1783. rl := 3*c;
  1784. SetLength(r,rl);
  1785. ral := 0;
  1786. ps := @s[1];
  1787. ClearPP();
  1788. locHistoryTop := -1;
  1789. removedCharIndexLength := 0;
  1790. FillByte(suppressState,SizeOf(suppressState),0);
  1791. LastKeyOwner.Length := 0;
  1792. i := 1;
  1793. while (i <= c) and MoveToNextChar() do begin
  1794. if (pp = nil) then begin // Start Matching
  1795. StartMatch();
  1796. end else begin
  1797. pp1 := FindChild(cp,pp);
  1798. if (pp1 <> nil) then begin
  1799. Inc(ppLevel);
  1800. pp := pp1;
  1801. if (pp^.ChildCount = 0) or (i = c) then begin
  1802. ok := False;
  1803. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  1804. if (pp^.GetWeightLength() > 0) then begin
  1805. AddWeightsAndClear();
  1806. ok := True;
  1807. end else
  1808. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1809. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1810. (ctxNode^.Data.WeightCount > 0)
  1811. then begin
  1812. AddContextWeights(@ctxNode^.Data);
  1813. ClearHistory();
  1814. ClearPP();
  1815. ok := True;
  1816. end
  1817. end;
  1818. if not ok then begin
  1819. RecordDeletion();
  1820. ok := False;
  1821. while HasHistory() do begin
  1822. GoBack();
  1823. if pp^.IsValid() and
  1824. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1825. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1826. )
  1827. then begin
  1828. AddWeightsAndClear();
  1829. ok := True;
  1830. Break;
  1831. end;
  1832. end;
  1833. if not ok then begin
  1834. cltemp := cl^.Base;
  1835. if (cltemp <> nil) then begin
  1836. ClearPP(False);
  1837. cl := cltemp;
  1838. Continue;
  1839. end;
  1840. end;
  1841. if not ok then begin
  1842. AddComputedWeights(cp);
  1843. ClearHistory();
  1844. ClearPP();
  1845. end;
  1846. end;
  1847. end else begin
  1848. RecordStep();
  1849. end;
  1850. end else begin
  1851. // permutations !
  1852. ok := False;
  1853. if TryPermutation() and pp^.IsValid() then begin
  1854. if (suppressState.CharCount = 0) then begin
  1855. AddWeightsAndClear();
  1856. Continue;
  1857. end;
  1858. while True do begin
  1859. if pp^.IsValid() and
  1860. (pp^.GetWeightLength() > 0) and
  1861. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1862. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1863. )
  1864. then begin
  1865. AddWeightsAndClear();
  1866. ok := True;
  1867. break;
  1868. end;
  1869. if not HasHistory() then
  1870. break;
  1871. GoBack();
  1872. if (pp = nil) then
  1873. break;
  1874. end;
  1875. end;
  1876. if not ok then begin
  1877. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  1878. if (pp^.GetWeightLength() > 0) then begin
  1879. AddWeightsAndClear();
  1880. ok := True;
  1881. end else
  1882. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1883. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1884. (ctxNode^.Data.WeightCount > 0)
  1885. then begin
  1886. AddContextWeights(@ctxNode^.Data);
  1887. ClearHistory();
  1888. ClearPP();
  1889. ok := True;
  1890. end
  1891. end;
  1892. if ok then
  1893. Continue;
  1894. end;
  1895. if not ok then begin
  1896. if (cl^.Base <> nil) then begin
  1897. cltemp := cl^.Base;
  1898. while HasHistory() do
  1899. GoBack();
  1900. pp := nil;
  1901. ppLevel := 0;
  1902. cl := cltemp;
  1903. Continue;
  1904. end;
  1905. //walk back
  1906. ok := False;
  1907. while HasHistory() do begin
  1908. GoBack();
  1909. if pp^.IsValid() and
  1910. (pp^.GetWeightLength() > 0) and
  1911. ( (suppressState.CharCount = 0) or
  1912. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  1913. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  1914. )
  1915. )
  1916. then begin
  1917. AddWeightsAndClear();
  1918. ok := True;
  1919. Break;
  1920. end;
  1921. end;
  1922. if ok then begin
  1923. AdvanceCharPos();
  1924. Continue;
  1925. end;
  1926. if (pp <> nil) then begin
  1927. AddComputedWeights(cp);
  1928. ClearHistory();
  1929. ClearPP();
  1930. end;
  1931. end;
  1932. end;
  1933. end;
  1934. if surrogateState then begin
  1935. Inc(ps);
  1936. Inc(i);
  1937. end;
  1938. //
  1939. Inc_I();
  1940. end;
  1941. SetLength(r,ral);
  1942. Result := r;
  1943. end;
  1944. function ComputeSortKey(
  1945. const AStr : PUnicodeChar;
  1946. const ALength : SizeInt;
  1947. const ACollation : PUCA_DataBook
  1948. ) : TUCASortKey;
  1949. var
  1950. r : TUCA_PropWeightsArray;
  1951. begin
  1952. r := ComputeRawSortKey(AStr,ALength,ACollation);
  1953. case ACollation^.VariableWeight of
  1954. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
  1955. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
  1956. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
  1957. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
  1958. else
  1959. Result := FormKeyShifted(r,ACollation);
  1960. end;
  1961. end;
  1962. end.