unicodedata.pas 97 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. -------------------------------------------------------------------------------
  16. Overview of the Unicode Collation Algorithm(UCA) data layout :
  17. ============================================================
  18. The UCA data(see “TUCA_DataBook”) are organized into index data
  19. (see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
  20. “OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
  21. the “Props” field of “TUCA_DataBook”). The index is a 3 level
  22. tables designed to minimize the overhaul data size. The
  23. properties’ data contain the actual (used) UCA’s properties
  24. for the customized code points(or sequence of code points)
  25. data (see TUCA_PropItemRec).
  26. To get the properties’ record of a code point, one goes
  27. through the index data to get its offset into the “Props”
  28. serialized data, see the “GetPropUCA” procedure.
  29. The “TUCA_PropItemRec” record, that represents the actual
  30. properties, contains a fixed part and a variable part. The
  31. fixed part is directly expressed as fields of the record :
  32. “WeightLength”, “ChildCount”, “Size”, “Flags”. The
  33. variable part depends on some values of the fixed part; For
  34. example “WeightLength” specify the number of weight[1] item,
  35. it can be zero or not null; The “Flags” fields does contains
  36. some bit states to indicate for example if the record’s owner,
  37. that is the target code point, is present(it is not always
  38. necessary to store the code point as you are required to have
  39. this information in the first place in order to get the
  40. “TUCA_PropItemRec” record).
  41. The data, as it is organized now, is as follow for each code point :
  42. * the fixed part is serialized,
  43. * if there are weight item array, they are serialized
  44. (see the "WeigthLength")
  45. * the code point is serialized (if needed)
  46. * the context[2] array is serialized
  47. * The children[3] record are serialized.
  48. The “Size” represent the size of the whole record, including its
  49. children records(see [3]). The “GetSelfOnlySize” returns the size
  50. of the queried record, excluding the size of its children.
  51. Notes :
  52. [1] : A weight item is an array of 3 words. A code point/sequence of code
  53. point may have zero or multiple items.
  54. [2] : There are characters(mostly japanese ones) that do not have their
  55. own weighs; There inherit the weights of the preceding character
  56. in the string that you will be evaluating.
  57. [3] : Some unicode characters are expressed using more than one code point.
  58. In that case the properties records are serialized as a trie. The
  59. trie data structure is useful when many characters’ expression have
  60. the same starting code point(s).
  61. [4] TUCA_PropItemRec serialization :
  62. TUCA_PropItemRec :
  63. WeightLength, ChildCount, Size, Flags [weight item array]
  64. [Code Point] [Context data]
  65. [Child 0] [Child 1] .. [Child n]
  66. each [Child k] is a TUCA_PropItemRec.
  67. }
  68. unit unicodedata;
  69. {$mode delphi}
  70. {$H+}
  71. {$PACKENUM 1}
  72. {$SCOPEDENUMS ON}
  73. {$pointermath on}
  74. {$define USE_INLINE}
  75. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  76. { $define uni_debug}
  77. interface
  78. const
  79. MAX_WORD = High(Word);
  80. LOW_SURROGATE_BEGIN = Word($DC00);
  81. LOW_SURROGATE_END = Word($DFFF);
  82. HIGH_SURROGATE_BEGIN = Word($D800);
  83. HIGH_SURROGATE_END = Word($DBFF);
  84. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  85. UCS4_HALF_BASE = LongWord($10000);
  86. UCS4_HALF_MASK = Word($3FF);
  87. MAX_LEGAL_UTF32 = $10FFFF;
  88. const
  89. // Unicode General Category
  90. UGC_UppercaseLetter = 0;
  91. UGC_LowercaseLetter = 1;
  92. UGC_TitlecaseLetter = 2;
  93. UGC_ModifierLetter = 3;
  94. UGC_OtherLetter = 4;
  95. UGC_NonSpacingMark = 5;
  96. UGC_CombiningMark = 6;
  97. UGC_EnclosingMark = 7;
  98. UGC_DecimalNumber = 8;
  99. UGC_LetterNumber = 9;
  100. UGC_OtherNumber = 10;
  101. UGC_ConnectPunctuation = 11;
  102. UGC_DashPunctuation = 12;
  103. UGC_OpenPunctuation = 13;
  104. UGC_ClosePunctuation = 14;
  105. UGC_InitialPunctuation = 15;
  106. UGC_FinalPunctuation = 16;
  107. UGC_OtherPunctuation = 17;
  108. UGC_MathSymbol = 18;
  109. UGC_CurrencySymbol = 19;
  110. UGC_ModifierSymbol = 20;
  111. UGC_OtherSymbol = 21;
  112. UGC_SpaceSeparator = 22;
  113. UGC_LineSeparator = 23;
  114. UGC_ParagraphSeparator = 24;
  115. UGC_Control = 25;
  116. UGC_Format = 26;
  117. UGC_Surrogate = 27;
  118. UGC_PrivateUse = 28;
  119. UGC_Unassigned = 29;
  120. type
  121. TUInt24Rec = packed record
  122. public
  123. {$ifdef FPC_LITTLE_ENDIAN}
  124. byte0, byte1, byte2 : Byte;
  125. {$else FPC_LITTLE_ENDIAN}
  126. byte2, byte1, byte0 : Byte;
  127. {$endif FPC_LITTLE_ENDIAN}
  128. public
  129. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  130. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  131. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  132. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  133. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  134. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  135. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  136. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  137. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  138. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  139. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  140. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  141. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  142. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  143. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  144. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  145. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  146. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  147. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  148. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  149. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  150. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  151. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  152. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  153. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  154. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  155. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  156. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  157. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  158. end;
  159. UInt24 = TUInt24Rec;
  160. PUInt24 = ^UInt24;
  161. const
  162. ZERO_UINT24 : UInt24 =
  163. {$ifdef FPC_LITTLE_ENDIAN}
  164. (byte0 : 0; byte1 : 0; byte2 : 0;);
  165. {$else FPC_LITTLE_ENDIAN}
  166. (byte2 : 0; byte1 : 0; byte0 : 0;);
  167. {$endif FPC_LITTLE_ENDIAN}
  168. type
  169. PUC_Prop = ^TUC_Prop;
  170. { TUC_Prop }
  171. { On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
  172. This breaks intended layout of initialized constants/variables.
  173. A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
  174. but for now just declare this record as "unpacked". This causes bloat, but it's better than having
  175. entire unit not working at all. }
  176. TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
  177. private
  178. function GetCategory : Byte;inline;
  179. procedure SetCategory(AValue : Byte);
  180. function GetWhiteSpace : Boolean;inline;
  181. procedure SetWhiteSpace(AValue : Boolean);
  182. function GetHangulSyllable : Boolean;inline;
  183. procedure SetHangulSyllable(AValue : Boolean);
  184. function GetNumericValue: Double;inline;
  185. public
  186. CategoryData : Byte;
  187. public
  188. CCC : Byte;
  189. NumericIndex : Byte;
  190. SimpleUpperCase : UInt24;
  191. SimpleLowerCase : UInt24;
  192. DecompositionID : SmallInt;
  193. public
  194. property Category : Byte read GetCategory write SetCategory;
  195. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  196. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  197. property NumericValue : Double read GetNumericValue;
  198. end;
  199. type
  200. TUCA_PropWeights = packed record
  201. Weights : array[0..2] of Word;
  202. end;
  203. PUCA_PropWeights = ^TUCA_PropWeights;
  204. TUCA_PropItemContextRec = packed record
  205. public
  206. CodePointCount : Byte;
  207. WeightCount : Byte;
  208. public
  209. function GetCodePoints() : PUInt24;inline;
  210. function GetWeights() : PUCA_PropWeights;inline;
  211. end;
  212. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  213. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  214. TUCA_PropItemContextTreeNodeRec = packed record
  215. public
  216. Left : Word;
  217. Right : Word;
  218. Data : TUCA_PropItemContextRec;
  219. public
  220. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  221. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  222. end;
  223. { TUCA_PropItemContextTreeRec }
  224. TUCA_PropItemContextTreeRec = packed record
  225. public
  226. Size : UInt24;
  227. public
  228. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  229. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  230. function Find(
  231. const AChars : PUInt24;
  232. const ACharCount : Integer;
  233. out ANode : PUCA_PropItemContextTreeNodeRec
  234. ) : Boolean;
  235. end;
  236. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  237. { TUCA_PropItemRec }
  238. TUCA_PropItemRec = packed record
  239. private
  240. const FLAG_VALID = 0;
  241. const FLAG_CODEPOINT = 1;
  242. const FLAG_CONTEXTUAL = 2;
  243. const FLAG_DELETION = 3;
  244. const FLAG_COMPRESS_WEIGHT_1 = 6;
  245. const FLAG_COMPRESS_WEIGHT_2 = 7;
  246. private
  247. function GetCodePoint() : UInt24;inline;
  248. public
  249. WeightLength : Byte;
  250. ChildCount : Byte;
  251. Size : Word;
  252. Flags : Byte;
  253. public
  254. function HasCodePoint() : Boolean;inline;
  255. property CodePoint : UInt24 read GetCodePoint;
  256. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  257. function IsValid() : Boolean;inline;
  258. //function GetWeightArray() : PUCA_PropWeights;inline;
  259. procedure GetWeightArray(ADest : PUCA_PropWeights);
  260. function GetSelfOnlySize() : Cardinal;inline;
  261. function GetContextual() : Boolean;inline;
  262. property Contextual : Boolean read GetContextual;
  263. function GetContext() : PUCA_PropItemContextTreeRec;
  264. function IsDeleted() : Boolean;inline;
  265. function IsWeightCompress_1() : Boolean;inline;
  266. function IsWeightCompress_2() : Boolean;inline;
  267. end;
  268. PUCA_PropItemRec = ^TUCA_PropItemRec;
  269. TUCA_VariableKind = (
  270. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  271. ucaIgnoreSP // This one is not implemented !
  272. );
  273. TCollationName = string[128];
  274. PUCA_DataBook = ^TUCA_DataBook;
  275. TUCA_DataBook = record
  276. public
  277. Base : PUCA_DataBook;
  278. Version : TCollationName;
  279. CollationName : TCollationName;
  280. VariableWeight : TUCA_VariableKind;
  281. Backwards : array[0..3] of Boolean;
  282. BMP_Table1 : PByte;
  283. BMP_Table2 : PUInt24;
  284. OBMP_Table1 : PWord;
  285. OBMP_Table2 : PUInt24;
  286. PropCount : Integer;
  287. Props : PUCA_PropItemRec;
  288. VariableLowLimit : Word;
  289. VariableHighLimit : Word;
  290. Dynamic : Boolean;
  291. public
  292. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  293. end;
  294. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  295. TCollationFields = set of TCollationField;
  296. const
  297. ROOT_COLLATION_NAME = 'DUCET';
  298. ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
  299. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  300. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  301. function UnicodeIsSurrogatePair(
  302. const AHighSurrogate,
  303. ALowSurrogate : UnicodeChar
  304. ) : Boolean;inline;
  305. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  306. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  307. function UnicodeToUpper(
  308. const AString : UnicodeString;
  309. const AIgnoreInvalidSequence : Boolean;
  310. out AResultString : UnicodeString
  311. ) : Integer;
  312. function UnicodeToLower(
  313. const AString : UnicodeString;
  314. const AIgnoreInvalidSequence : Boolean;
  315. out AResultString : UnicodeString
  316. ) : Integer;
  317. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  318. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  319. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  320. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  321. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  322. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  323. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  324. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  325. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  326. type
  327. TUCASortKeyItem = Word;
  328. TUCASortKey = array of TUCASortKeyItem;
  329. function ComputeSortKey(
  330. const AString : UnicodeString;
  331. const ACollation : PUCA_DataBook
  332. ) : TUCASortKey;inline;overload;
  333. function ComputeSortKey(
  334. const AStr : PUnicodeChar;
  335. const ALength : SizeInt;
  336. const ACollation : PUCA_DataBook
  337. ) : TUCASortKey;overload;
  338. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  339. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  340. function IncrementalCompareString(
  341. const AStrA : PUnicodeChar;
  342. const ALengthA : SizeInt;
  343. const AStrB : PUnicodeChar;
  344. const ALengthB : SizeInt;
  345. const ACollation : PUCA_DataBook
  346. ) : Integer;overload;
  347. function IncrementalCompareString(
  348. const AStrA,
  349. AStrB : UnicodeString;
  350. const ACollation : PUCA_DataBook
  351. ) : Integer;inline;overload;
  352. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
  353. function RegisterCollation(
  354. const ADirectory,
  355. ALanguage : string
  356. ) : Boolean;overload;
  357. function UnregisterCollation(const AName : ansistring): Boolean;
  358. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  359. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  360. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  361. function GetCollationCount() : Integer;
  362. procedure PrepareCollation(
  363. ACollation : PUCA_DataBook;
  364. const ABaseName : ansistring;
  365. const AChangedFields : TCollationFields
  366. );
  367. function LoadCollation(
  368. const AData : Pointer;
  369. const ADataLength : Integer
  370. ) : PUCA_DataBook;overload;
  371. function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
  372. function LoadCollation(
  373. const ADirectory,
  374. ALanguage : string
  375. ) : PUCA_DataBook;overload;
  376. procedure FreeCollation(AItem : PUCA_DataBook);
  377. type
  378. TEndianKind = (Little, Big);
  379. const
  380. ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
  381. {$IFDEF ENDIAN_LITTLE}
  382. ENDIAN_NATIVE = TEndianKind.Little;
  383. ENDIAN_NON_NATIVE = TEndianKind.Big;
  384. {$ENDIF ENDIAN_LITTLE}
  385. {$IFDEF ENDIAN_BIG}
  386. ENDIAN_NATIVE = TEndianKind.Big;
  387. ENDIAN_NON_NATIVE = TEndianKind.Little;
  388. {$ENDIF ENDIAN_BIG}
  389. resourcestring
  390. SCollationNotFound = 'Collation not found : "%s".';
  391. implementation
  392. uses
  393. unicodenumtable;
  394. type
  395. TCardinalRec = packed record
  396. {$ifdef FPC_LITTLE_ENDIAN}
  397. byte0, byte1, byte2, byte3 : Byte;
  398. {$else FPC_LITTLE_ENDIAN}
  399. byte3, byte2, byte1, byte0 : Byte;
  400. {$endif FPC_LITTLE_ENDIAN}
  401. end;
  402. TWordRec = packed record
  403. {$ifdef FPC_LITTLE_ENDIAN}
  404. byte0, byte1 : Byte;
  405. {$else FPC_LITTLE_ENDIAN}
  406. byte1, byte0 : Byte;
  407. {$endif FPC_LITTLE_ENDIAN}
  408. end;
  409. { TUInt24Rec }
  410. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  411. begin
  412. TCardinalRec(Result).byte0 := a.byte0;
  413. TCardinalRec(Result).byte1 := a.byte1;
  414. TCardinalRec(Result).byte2 := a.byte2;
  415. TCardinalRec(Result).byte3 := 0;
  416. end;
  417. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  418. begin
  419. Result := Cardinal(a);
  420. end;
  421. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  422. begin
  423. {$IFOPT R+}
  424. if (a > $FFFF) then
  425. Error(reIntOverflow);
  426. {$ENDIF R+}
  427. TWordRec(Result).byte0 := a.byte0;
  428. TWordRec(Result).byte1 := a.byte1;
  429. end;
  430. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  431. begin
  432. {$IFOPT R+}
  433. if (a > $FF) then
  434. Error(reIntOverflow);
  435. {$ENDIF R+}
  436. Result := a.byte0;
  437. end;
  438. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  439. begin
  440. {$IFOPT R+}
  441. if (a > $FFFFFF) then
  442. Error(reIntOverflow);
  443. {$ENDIF R+}
  444. Result.byte0 := TCardinalRec(a).byte0;
  445. Result.byte1 := TCardinalRec(a).byte1;
  446. Result.byte2 := TCardinalRec(a).byte2;
  447. end;
  448. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  449. begin
  450. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  451. end;
  452. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  453. begin
  454. Result := (TCardinalRec(b).byte3 = 0) and
  455. (a.byte0 = TCardinalRec(b).byte0) and
  456. (a.byte1 = TCardinalRec(b).byte1) and
  457. (a.byte2 = TCardinalRec(b).byte2);
  458. end;
  459. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  460. begin
  461. Result := (b = a);
  462. end;
  463. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  464. begin
  465. Result := (LongInt(a) = b);
  466. end;
  467. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  468. begin
  469. Result := (b = a);
  470. end;
  471. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  472. begin
  473. Result := (a.byte2 = 0) and
  474. (a.byte0 = TWordRec(b).byte0) and
  475. (a.byte1 = TWordRec(b).byte1);
  476. end;
  477. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  478. begin
  479. Result := (b = a);
  480. end;
  481. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  482. begin
  483. Result := (a.byte2 = 0) and
  484. (a.byte1 = 0) and
  485. (a.byte0 = b);
  486. end;
  487. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  488. begin
  489. Result := (b = a);
  490. end;
  491. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  492. begin
  493. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  494. end;
  495. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  496. begin
  497. Result := (TCardinalRec(b).byte3 <> 0) or
  498. (a.byte0 <> TCardinalRec(b).byte0) or
  499. (a.byte1 <> TCardinalRec(b).byte1) or
  500. (a.byte2 <> TCardinalRec(b).byte2);
  501. end;
  502. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  503. begin
  504. Result := (b <> a);
  505. end;
  506. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  507. begin
  508. Result := (a.byte2 > b.byte2) or
  509. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  510. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  511. end;
  512. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  513. begin
  514. Result := Cardinal(a) > b;
  515. end;
  516. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  517. begin
  518. Result := a > Cardinal(b);
  519. end;
  520. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  521. begin
  522. Result := (a.byte2 > b.byte2) or
  523. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  524. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  525. end;
  526. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  527. begin
  528. Result := Cardinal(a) >= b;
  529. end;
  530. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  531. begin
  532. Result := a >= Cardinal(b);
  533. end;
  534. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  535. begin
  536. Result := (b > a);
  537. end;
  538. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  539. begin
  540. Result := Cardinal(a) < b;
  541. end;
  542. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  543. begin
  544. Result := a < Cardinal(b);
  545. end;
  546. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  547. begin
  548. Result := (b >= a);
  549. end;
  550. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  551. begin
  552. Result := Cardinal(a) <= b;
  553. end;
  554. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  555. begin
  556. Result := a <= Cardinal(b);
  557. end;
  558. type
  559. TBitOrder = 0..7;
  560. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  561. begin
  562. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  563. end;
  564. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  565. begin
  566. if AValue then
  567. AData := AData or (1 shl (ABit mod 8))
  568. else
  569. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  570. end;
  571. var
  572. CollationTable : array of PUCA_DataBook;
  573. function IndexOfCollation(const AName : string) : Integer;
  574. var
  575. i, c : Integer;
  576. p : Pointer;
  577. begin
  578. c := Length(AName);
  579. p := @AName[1];
  580. for i := 0 to Length(CollationTable) - 1 do begin
  581. if (Length(CollationTable[i]^.CollationName) = c) and
  582. (CompareByte((CollationTable[i]^.CollationName[1]),p^,c)=0)
  583. then
  584. exit(i);
  585. end;
  586. Result := -1;
  587. end;
  588. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  589. var
  590. i : Integer;
  591. begin
  592. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  593. if Result then begin
  594. i := Length(CollationTable);
  595. SetLength(CollationTable,(i+1));
  596. CollationTable[i] := ACollation;
  597. end;
  598. end;
  599. function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
  600. var
  601. cl : PUCA_DataBook;
  602. begin
  603. cl := LoadCollation(ADirectory,ALanguage);
  604. if (cl = nil) then
  605. exit(False);
  606. try
  607. Result := RegisterCollation(cl);
  608. except
  609. FreeCollation(cl);
  610. raise;
  611. end;
  612. if not Result then
  613. FreeCollation(cl);
  614. end;
  615. function UnregisterCollation(const AName : ansistring): Boolean;
  616. var
  617. i, c : Integer;
  618. begin
  619. i := IndexOfCollation(AName);
  620. Result := (i >= 0);
  621. if Result then begin
  622. c := Length(CollationTable);
  623. if (c = 1) then begin
  624. SetLength(CollationTable,0);
  625. end else begin
  626. CollationTable[i] := CollationTable[c-1];
  627. SetLength(CollationTable,(c-1));
  628. end;
  629. end;
  630. end;
  631. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  632. var
  633. i : Integer;
  634. cl : PUCA_DataBook;
  635. begin
  636. if AFreeDynamicCollations then begin
  637. for i := Low(CollationTable) to High(CollationTable) do begin
  638. if CollationTable[i].Dynamic then begin
  639. cl := CollationTable[i];
  640. CollationTable[i] := nil;
  641. FreeCollation(cl);
  642. end;
  643. end;
  644. end;
  645. SetLength(CollationTable,0);
  646. end;
  647. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  648. var
  649. i : Integer;
  650. begin
  651. i := IndexOfCollation(AName);
  652. if (i = -1) then
  653. Result := nil
  654. else
  655. Result := CollationTable[i];
  656. end;
  657. function GetCollationCount() : Integer;
  658. begin
  659. Result := Length(CollationTable);
  660. end;
  661. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  662. begin
  663. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  664. Result := nil
  665. else
  666. Result := CollationTable[AIndex];
  667. end;
  668. procedure PrepareCollation(
  669. ACollation : PUCA_DataBook;
  670. const ABaseName : ansistring;
  671. const AChangedFields : TCollationFields
  672. );
  673. var
  674. s : ansistring;
  675. p, base : PUCA_DataBook;
  676. begin
  677. if (ABaseName <> '') then
  678. s := ABaseName
  679. else
  680. s := ROOT_COLLATION_NAME;
  681. p := ACollation;
  682. base := FindCollation(s);
  683. if (base = nil) then
  684. Error(reCodesetConversion);
  685. p^.Base := base;
  686. if not(TCollationField.BackWard in AChangedFields) then
  687. p^.Backwards := base^.Backwards;
  688. if not(TCollationField.VariableLowLimit in AChangedFields) then
  689. p^.VariableLowLimit := base^.VariableLowLimit;
  690. if not(TCollationField.VariableHighLimit in AChangedFields) then
  691. p^.VariableLowLimit := base^.VariableHighLimit;
  692. end;
  693. type
  694. TSerializedCollationHeader = packed record
  695. Base : TCollationName;
  696. Version : TCollationName;
  697. CollationName : TCollationName;
  698. VariableWeight : Byte;
  699. Backwards : Byte;
  700. BMP_Table1Length : DWord;
  701. BMP_Table2Length : DWord;
  702. OBMP_Table1Length : DWord;
  703. OBMP_Table2Length : DWord;
  704. PropCount : DWord;
  705. VariableLowLimit : Word;
  706. VariableHighLimit : Word;
  707. ChangedFields : Byte;
  708. end;
  709. PSerializedCollationHeader = ^TSerializedCollationHeader;
  710. procedure FreeCollation(AItem : PUCA_DataBook);
  711. var
  712. h : PSerializedCollationHeader;
  713. begin
  714. if (AItem = nil) or not(AItem^.Dynamic) then
  715. exit;
  716. h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
  717. if (AItem^.BMP_Table1 <> nil) then
  718. FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
  719. if (AItem^.BMP_Table2 <> nil) then
  720. FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
  721. if (AItem^.OBMP_Table1 <> nil) then
  722. FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
  723. if (AItem^.OBMP_Table2 <> nil) then
  724. FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
  725. if (AItem^.Props <> nil) then
  726. FreeMem(AItem^.Props,h^.PropCount);
  727. FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  728. end;
  729. function LoadCollation(
  730. const AData : Pointer;
  731. const ADataLength : Integer
  732. ) : PUCA_DataBook;
  733. var
  734. dataPointer : PByte;
  735. readedLength : LongInt;
  736. function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
  737. begin
  738. Result := (readedLength + ALength) <= ADataLength;
  739. if not result then
  740. exit;
  741. Move(dataPointer^,ADest^,ALength);
  742. Inc(dataPointer,ALength);
  743. readedLength := readedLength + ALength;
  744. end;
  745. var
  746. r : PUCA_DataBook;
  747. h : PSerializedCollationHeader;
  748. cfs : TCollationFields;
  749. i : Integer;
  750. baseName : TCollationName;
  751. begin
  752. readedLength := 0;
  753. dataPointer := AData;
  754. r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  755. try
  756. h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
  757. if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
  758. exit;
  759. r^.Version := h^.Version;
  760. r^.CollationName := h^.CollationName;
  761. r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
  762. r^.Backwards[0] := IsBitON(h^.Backwards,0);
  763. r^.Backwards[1] := IsBitON(h^.Backwards,1);
  764. r^.Backwards[2] := IsBitON(h^.Backwards,2);
  765. r^.Backwards[3] := IsBitON(h^.Backwards,3);
  766. if (h^.BMP_Table1Length > 0) then begin
  767. r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
  768. if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
  769. exit;
  770. end;
  771. if (h^.BMP_Table2Length > 0) then begin
  772. r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
  773. if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
  774. exit;
  775. end;
  776. if (h^.OBMP_Table1Length > 0) then begin
  777. r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
  778. if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
  779. exit;
  780. end;
  781. if (h^.OBMP_Table2Length > 0) then begin
  782. r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
  783. if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
  784. exit;
  785. end;
  786. r^.PropCount := h^.PropCount;
  787. if (h^.PropCount > 0) then begin
  788. r^.Props := GetMem(h^.PropCount);
  789. if not ReadBuffer(r^.Props,h^.PropCount) then
  790. exit;
  791. end;
  792. r^.VariableLowLimit := h^.VariableLowLimit;
  793. r^.VariableHighLimit := h^.VariableHighLimit;
  794. cfs := [];
  795. for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
  796. if IsBitON(h^.ChangedFields,i) then
  797. cfs := cfs + [TCollationField(i)];
  798. end;
  799. if (h^.Base <> '') then
  800. baseName := h^.Base
  801. else if (h^.CollationName <> ROOT_COLLATION_NAME) then
  802. baseName := ROOT_COLLATION_NAME
  803. else
  804. baseName := '';
  805. if (baseName <> '') then
  806. PrepareCollation(r,baseName,cfs);
  807. r^.Dynamic := True;
  808. Result := r;
  809. except
  810. FreeCollation(r);
  811. raise;
  812. end;
  813. end;
  814. {$PUSH}
  815. function LoadCollation(const AFileName : string) : PUCA_DataBook;
  816. const
  817. BLOCK_SIZE = 16*1024;
  818. var
  819. f : File of Byte;
  820. locSize, locReaded, c : LongInt;
  821. locBuffer : PByte;
  822. locBlockSize : LongInt;
  823. begin
  824. Result := nil;
  825. {$I-}
  826. if (AFileName = '') then
  827. exit;
  828. Assign(f,AFileName);
  829. Reset(f);
  830. try
  831. if (IOResult <> 0) then
  832. exit;
  833. locSize := FileSize(f);
  834. if (locSize < SizeOf(TSerializedCollationHeader)) then
  835. exit;
  836. locBuffer := GetMem(locSize);
  837. try
  838. locBlockSize := BLOCK_SIZE;
  839. locReaded := 0;
  840. while (locReaded < locSize) do begin
  841. if (locBlockSize > (locSize-locReaded)) then
  842. locBlockSize := locSize-locReaded;
  843. BlockRead(f,locBuffer[locReaded],locBlockSize,c);
  844. if (IOResult <> 0) or (c <= 0) then
  845. exit;
  846. locReaded := locReaded + c;
  847. end;
  848. Result := LoadCollation(locBuffer,locSize);
  849. finally
  850. FreeMem(locBuffer,locSize);
  851. end;
  852. finally
  853. Close(f);
  854. end;
  855. end;
  856. {$POP}
  857. function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
  858. var
  859. fileName : string;
  860. begin
  861. fileName := ADirectory;
  862. if (fileName <> '') then begin
  863. if (fileName[Length(fileName)] <> DirectorySeparator) then
  864. fileName := fileName + DirectorySeparator;
  865. end;
  866. fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
  867. Result := LoadCollation(fileName);
  868. end;
  869. {$INCLUDE unicodedata.inc}
  870. {$IFDEF ENDIAN_LITTLE}
  871. {$INCLUDE unicodedata_le.inc}
  872. {$ENDIF ENDIAN_LITTLE}
  873. {$IFDEF ENDIAN_BIG}
  874. {$INCLUDE unicodedata_be.inc}
  875. {$ENDIF ENDIAN_BIG}
  876. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  877. begin
  878. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  879. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  880. end;
  881. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  882. begin
  883. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  884. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  885. end;
  886. function UnicodeIsSurrogatePair(
  887. const AHighSurrogate,
  888. ALowSurrogate : UnicodeChar
  889. ) : Boolean;
  890. begin
  891. Result :=
  892. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  893. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  894. ) and
  895. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  896. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  897. )
  898. end;
  899. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  900. begin
  901. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  902. (Word(AValue) <= HIGH_SURROGATE_END);
  903. end;
  904. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  905. begin
  906. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  907. (Word(AValue) <= LOW_SURROGATE_END);
  908. end;
  909. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  910. begin
  911. Result:=
  912. @UC_PROP_ARRAY[
  913. UC_TABLE_3[
  914. UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
  915. [lo(ACodePoint) shr 4]
  916. ][lo(ACodePoint) and $F]
  917. ]; {
  918. @UC_PROP_ARRAY[
  919. UC_TABLE_2[
  920. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  921. WordRec(ACodePoint).Lo
  922. ]
  923. ];}
  924. end;
  925. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  926. begin
  927. Result:=
  928. @UC_PROP_ARRAY[
  929. UCO_TABLE_3[
  930. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  931. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  932. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  933. ]; {
  934. Result:=
  935. @UC_PROP_ARRAY[
  936. UCO_TABLE_2[
  937. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  938. Word(ALowS) - LOW_SURROGATE_BEGIN
  939. ]
  940. ]; }
  941. end;
  942. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  943. var
  944. l, h : UnicodeChar;
  945. begin
  946. if (ACodePoint <= High(Word)) then
  947. exit(GetProps(Word(ACodePoint)));
  948. FromUCS4(ACodePoint,h,l);
  949. Result := GetProps(h,l);
  950. end;
  951. function UnicodeToUpper(
  952. const AString : UnicodeString;
  953. const AIgnoreInvalidSequence : Boolean;
  954. out AResultString : UnicodeString
  955. ) : Integer;
  956. var
  957. i, c : SizeInt;
  958. pp, pr : PUnicodeChar;
  959. pu : PUC_Prop;
  960. locIsSurrogate : Boolean;
  961. r : UnicodeString;
  962. begin
  963. c := Length(AString);
  964. SetLength(r,2*c);
  965. if (c > 0) then begin
  966. pp := @AString[1];
  967. pr := @r[1];
  968. i := 1;
  969. while (i <= c) do begin
  970. pu := GetProps(Word(pp^));
  971. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  972. if locIsSurrogate then begin
  973. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  974. if AIgnoreInvalidSequence then begin
  975. pr^ := pp^;
  976. Inc(pp);
  977. Inc(pr);
  978. Inc(i);
  979. Continue;
  980. end;
  981. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  982. end;
  983. pu := GetProps(pp^,AString[i+1]);
  984. end;
  985. if (pu^.SimpleUpperCase = 0) then begin
  986. pr^ := pp^;
  987. if locIsSurrogate then begin
  988. Inc(pp);
  989. Inc(pr);
  990. Inc(i);
  991. pr^ := pp^;
  992. end;
  993. end else begin
  994. if (pu^.SimpleUpperCase <= $FFFF) then begin
  995. pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
  996. end else begin
  997. FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  998. Inc(pr);
  999. end;
  1000. if locIsSurrogate then begin
  1001. Inc(pp);
  1002. Inc(i);
  1003. end;
  1004. end;
  1005. Inc(pp);
  1006. Inc(pr);
  1007. Inc(i);
  1008. end;
  1009. Dec(pp);
  1010. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1011. SetLength(r,i);
  1012. AResultString := r;
  1013. end;
  1014. Result := 0;
  1015. end;
  1016. function UnicodeToLower(
  1017. const AString : UnicodeString;
  1018. const AIgnoreInvalidSequence : Boolean;
  1019. out AResultString : UnicodeString
  1020. ) : Integer;
  1021. var
  1022. i, c : SizeInt;
  1023. pp, pr : PUnicodeChar;
  1024. pu : PUC_Prop;
  1025. locIsSurrogate : Boolean;
  1026. r : UnicodeString;
  1027. begin
  1028. c := Length(AString);
  1029. SetLength(r,2*c);
  1030. if (c > 0) then begin
  1031. pp := @AString[1];
  1032. pr := @r[1];
  1033. i := 1;
  1034. while (i <= c) do begin
  1035. pu := GetProps(Word(pp^));
  1036. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1037. if locIsSurrogate then begin
  1038. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  1039. if AIgnoreInvalidSequence then begin
  1040. pr^ := pp^;
  1041. Inc(pp);
  1042. Inc(pr);
  1043. Inc(i);
  1044. Continue;
  1045. end;
  1046. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  1047. end;
  1048. pu := GetProps(pp^,AString[i+1]);
  1049. end;
  1050. if (pu^.SimpleLowerCase = 0) then begin
  1051. pr^ := pp^;
  1052. if locIsSurrogate then begin
  1053. Inc(pp);
  1054. Inc(pr);
  1055. Inc(i);
  1056. pr^ := pp^;
  1057. end;
  1058. end else begin
  1059. if (pu^.SimpleLowerCase <= $FFFF) then begin
  1060. pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
  1061. end else begin
  1062. FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1063. Inc(pr);
  1064. end;
  1065. if locIsSurrogate then begin
  1066. Inc(pp);
  1067. Inc(i);
  1068. end;
  1069. end;
  1070. Inc(pp);
  1071. Inc(pr);
  1072. Inc(i);
  1073. end;
  1074. Dec(pp);
  1075. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1076. SetLength(r,i);
  1077. AResultString := r;
  1078. end;
  1079. Result := 0;
  1080. end;
  1081. //----------------------------------------------------------------------
  1082. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  1083. const
  1084. SBase = $AC00;
  1085. LBase = $1100;
  1086. VBase = $1161;
  1087. TBase = $11A7;
  1088. LCount = 19;
  1089. VCount = 21;
  1090. TCount = 28;
  1091. NCount = VCount * TCount; // 588
  1092. SCount = LCount * NCount; // 11172
  1093. var
  1094. SIndex, L, V, T : Integer;
  1095. begin
  1096. SIndex := AChar - SBase;
  1097. if (SIndex < 0) or (SIndex >= SCount) then begin
  1098. ABuffer^ := AChar;
  1099. exit(1);
  1100. end;
  1101. L := LBase + SIndex div NCount;
  1102. V := VBase + (SIndex mod NCount) div TCount;
  1103. T := TBase + SIndex mod TCount;
  1104. ABuffer[0] := L;
  1105. ABuffer[1] := V;
  1106. Result := 2;
  1107. if (T <> TBase) then begin
  1108. ABuffer[2] := T;
  1109. Inc(Result);
  1110. end;
  1111. end;
  1112. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  1113. var
  1114. locStack : array[0..23] of Cardinal;
  1115. locStackIdx : Integer;
  1116. ResultBuffer : array[0..23] of Cardinal;
  1117. ResultIdx : Integer;
  1118. procedure AddCompositionToStack(const AIndex : Integer);
  1119. var
  1120. pdecIdx : ^TDecompositionIndexRec;
  1121. k, kc : Integer;
  1122. pu : ^UInt24;
  1123. begin
  1124. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  1125. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  1126. kc := pdecIdx^.Length;
  1127. Inc(pu,kc);
  1128. for k := 1 to kc do begin
  1129. Dec(pu);
  1130. locStack[locStackIdx + k] := pu^;
  1131. end;
  1132. locStackIdx := locStackIdx + kc;
  1133. end;
  1134. procedure AddResult(const AChar : Cardinal);inline;
  1135. begin
  1136. Inc(ResultIdx);
  1137. ResultBuffer[ResultIdx] := AChar;
  1138. end;
  1139. function PopStack() : Cardinal;inline;
  1140. begin
  1141. Result := locStack[locStackIdx];
  1142. Dec(locStackIdx);
  1143. end;
  1144. var
  1145. cu : Cardinal;
  1146. decIdx : SmallInt;
  1147. locIsWord : Boolean;
  1148. i : Integer;
  1149. p : PUnicodeChar;
  1150. begin
  1151. ResultIdx := -1;
  1152. locStackIdx := -1;
  1153. AddCompositionToStack(ADecomposeIndex);
  1154. while (locStackIdx >= 0) do begin
  1155. cu := PopStack();
  1156. locIsWord := (cu <= MAX_WORD);
  1157. if locIsWord then
  1158. decIdx := GetProps(Word(cu))^.DecompositionID
  1159. else
  1160. decIdx := GetProps(cu)^.DecompositionID;
  1161. if (decIdx = -1) then
  1162. AddResult(cu)
  1163. else
  1164. AddCompositionToStack(decIdx);
  1165. end;
  1166. p := ABuffer;
  1167. Result := 0;
  1168. for i := 0 to ResultIdx do begin
  1169. cu := ResultBuffer[i];
  1170. if (cu <= MAX_WORD) then begin
  1171. p[0] := UnicodeChar(Word(cu));
  1172. Inc(p);
  1173. end else begin
  1174. FromUCS4(cu,p[0],p[1]);
  1175. Inc(p,2);
  1176. Inc(Result);
  1177. end;
  1178. end;
  1179. Result := Result + ResultIdx + 1;
  1180. end;
  1181. procedure CanonicalOrder(var AString : UnicodeString);
  1182. begin
  1183. CanonicalOrder(@AString[1],Length(AString));
  1184. end;
  1185. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  1186. var
  1187. i, c : SizeInt;
  1188. p, q : PUnicodeChar;
  1189. locIsSurrogateP, locIsSurrogateQ : Boolean;
  1190. procedure Swap();
  1191. var
  1192. t, t1 : UnicodeChar;
  1193. begin
  1194. if not locIsSurrogateP then begin
  1195. if not locIsSurrogateQ then begin
  1196. t := p^;
  1197. p^ := q^;
  1198. q^ := t;
  1199. exit;
  1200. end;
  1201. t := p^;
  1202. p[0] := q[0];
  1203. p[1] := q[1];
  1204. q[1] := t;
  1205. exit;
  1206. end;
  1207. if not locIsSurrogateQ then begin
  1208. t := q[0];
  1209. p[2] := p[1];
  1210. p[1] := p[0];
  1211. p[0] := t;
  1212. exit;
  1213. end;
  1214. t := p[0];
  1215. t1 := p[1];
  1216. p[0] := q[0];
  1217. p[1] := q[1];
  1218. q[0] := t;
  1219. q[1] := t1;
  1220. end;
  1221. var
  1222. pu : PUC_Prop;
  1223. cccp, cccq : Byte;
  1224. begin
  1225. c := ALength;
  1226. if (c < 2) then
  1227. exit;
  1228. p := AStr;
  1229. i := 1;
  1230. while (i < c) do begin
  1231. pu := GetProps(Word(p^));
  1232. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  1233. if locIsSurrogateP then begin
  1234. if (i = (c - 1)) then
  1235. Break;
  1236. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  1237. Inc(p);
  1238. Inc(i);
  1239. Continue;
  1240. end;
  1241. pu := GetProps(p[0],p[1]);
  1242. end;
  1243. if (pu^.CCC > 0) then begin
  1244. cccp := pu^.CCC;
  1245. if locIsSurrogateP then
  1246. q := p + 2
  1247. else
  1248. q := p + 1;
  1249. pu := GetProps(Word(q^));
  1250. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  1251. if locIsSurrogateQ then begin
  1252. if (i = c) then
  1253. Break;
  1254. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  1255. Inc(p);
  1256. Inc(i);
  1257. Continue;
  1258. end;
  1259. pu := GetProps(q[0],q[1]);
  1260. end;
  1261. cccq := pu^.CCC;
  1262. if (cccq > 0) and (cccp > cccq) then begin
  1263. Swap();
  1264. if (i > 1) then begin
  1265. Dec(p);
  1266. Dec(i);
  1267. pu := GetProps(Word(p^));
  1268. if (pu^.Category = UGC_Surrogate) then begin
  1269. if (i > 1) then begin
  1270. Dec(p);
  1271. Dec(i);
  1272. end;
  1273. end;
  1274. Continue;
  1275. end;
  1276. end;
  1277. end;
  1278. if locIsSurrogateP then begin
  1279. Inc(p);
  1280. Inc(i);
  1281. end;
  1282. Inc(p);
  1283. Inc(i);
  1284. end;
  1285. end;
  1286. //Canonical Decomposition
  1287. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  1288. begin
  1289. Result := NormalizeNFD(@AString[1],Length(AString));
  1290. end;
  1291. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  1292. const MAX_EXPAND = 3;
  1293. var
  1294. i, c, kc, k : SizeInt;
  1295. pp, pr : PUnicodeChar;
  1296. pu : PUC_Prop;
  1297. locIsSurrogate : Boolean;
  1298. cpArray : array[0..7] of Cardinal;
  1299. cp : Cardinal;
  1300. begin
  1301. c := ALength;
  1302. SetLength(Result,(MAX_EXPAND*c));
  1303. if (c > 0) then begin
  1304. pp := AStr;
  1305. pr := @Result[1];
  1306. i := 1;
  1307. while (i <= c) do begin
  1308. pu := GetProps(Word(pp^));
  1309. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1310. if locIsSurrogate then begin
  1311. if (i = c) then
  1312. Break;
  1313. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  1314. pr^ := pp^;
  1315. Inc(pp);
  1316. Inc(pr);
  1317. Inc(i);
  1318. Continue;
  1319. end;
  1320. pu := GetProps(pp[0],pp[1]);
  1321. end;
  1322. if pu^.HangulSyllable then begin
  1323. if locIsSurrogate then begin
  1324. cp := ToUCS4(pp[0],pp[1]);
  1325. Inc(pp);
  1326. Inc(i);
  1327. end else begin
  1328. cp := Word(pp^);
  1329. end;
  1330. kc := DecomposeHangul(cp,@cpArray[0]);
  1331. for k := 0 to kc - 1 do begin
  1332. if (cpArray[k] <= MAX_WORD) then begin
  1333. pr^ := UnicodeChar(Word(cpArray[k]));
  1334. pr := pr + 1;
  1335. end else begin
  1336. FromUCS4(cpArray[k],pr[0],pr[1]);
  1337. pr := pr + 2;
  1338. end;
  1339. end;
  1340. if (kc > 0) then
  1341. Dec(pr);
  1342. end else begin
  1343. if (pu^.DecompositionID = -1) then begin
  1344. pr^ := pp^;
  1345. if locIsSurrogate then begin
  1346. Inc(pp);
  1347. Inc(pr);
  1348. Inc(i);
  1349. pr^ := pp^;
  1350. end;
  1351. end else begin
  1352. k := Decompose(pu^.DecompositionID,pr);
  1353. pr := pr + (k - 1);
  1354. if locIsSurrogate then begin
  1355. Inc(pp);
  1356. Inc(i);
  1357. end;
  1358. end;
  1359. end;
  1360. Inc(pp);
  1361. Inc(pr);
  1362. Inc(i);
  1363. end;
  1364. Dec(pp);
  1365. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  1366. SetLength(Result,i);
  1367. CanonicalOrder(@Result[1],Length(Result));
  1368. end;
  1369. end;
  1370. { TUCA_PropItemContextTreeNodeRec }
  1371. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  1372. begin
  1373. if (Self.Left = 0) then
  1374. Result := nil
  1375. else
  1376. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  1377. end;
  1378. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  1379. begin
  1380. if (Self.Right = 0) then
  1381. Result := nil
  1382. else
  1383. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  1384. end;
  1385. { TUCA_PropItemContextRec }
  1386. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  1387. begin
  1388. Result := PUInt24(
  1389. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  1390. SizeOf(Self.WeightCount)
  1391. );
  1392. end;
  1393. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  1394. begin
  1395. Result := PUCA_PropWeights(
  1396. PtrUInt(@Self) +
  1397. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  1398. (Self.CodePointCount*SizeOf(UInt24))
  1399. );
  1400. end;
  1401. { TUCA_PropItemContextTreeRec }
  1402. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  1403. begin
  1404. if (Size = 0) then
  1405. Result := nil
  1406. else
  1407. Result := PUCA_PropItemContextTreeNodeRec(
  1408. PtrUInt(
  1409. PtrUInt(@Self) + SizeOf(UInt24){Size}
  1410. )
  1411. );
  1412. end;
  1413. function CompareCodePoints(
  1414. A : PUInt24; LA : Integer;
  1415. B : PUInt24; LB : Integer
  1416. ) : Integer;
  1417. var
  1418. i, hb : Integer;
  1419. begin
  1420. if (A = B) then
  1421. exit(0);
  1422. Result := 1;
  1423. hb := LB - 1;
  1424. for i := 0 to LA - 1 do begin
  1425. if (i > hb) then
  1426. exit;
  1427. if (A[i] < B[i]) then
  1428. exit(-1);
  1429. if (A[i] > B[i]) then
  1430. exit(1);
  1431. end;
  1432. if (LA = LB) then
  1433. exit(0);
  1434. exit(-1);
  1435. end;
  1436. function TUCA_PropItemContextTreeRec.Find(
  1437. const AChars : PUInt24;
  1438. const ACharCount : Integer;
  1439. out ANode : PUCA_PropItemContextTreeNodeRec
  1440. ) : Boolean;
  1441. var
  1442. t : PUCA_PropItemContextTreeNodeRec;
  1443. begin
  1444. t := Data;
  1445. while (t <> nil) do begin
  1446. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1447. 0 : Break;
  1448. -1 : t := t^.GetLeftNode();
  1449. else
  1450. t := t^.GetRightNode();
  1451. end;
  1452. end;
  1453. Result := (t <> nil);
  1454. if Result then
  1455. ANode := t;
  1456. end;
  1457. { TUC_Prop }
  1458. function TUC_Prop.GetCategory: Byte;
  1459. begin
  1460. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1461. end;
  1462. function TUC_Prop.GetNumericValue: Double;
  1463. begin
  1464. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1465. end;
  1466. procedure TUC_Prop.SetCategory(AValue: Byte);
  1467. begin
  1468. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1469. end;
  1470. function TUC_Prop.GetWhiteSpace: Boolean;
  1471. begin
  1472. Result := IsBitON(CategoryData,0);
  1473. end;
  1474. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1475. begin
  1476. SetBit(CategoryData,0,AValue);
  1477. end;
  1478. function TUC_Prop.GetHangulSyllable: Boolean;
  1479. begin
  1480. Result := IsBitON(CategoryData,1);
  1481. end;
  1482. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1483. begin
  1484. SetBit(CategoryData,1,AValue);
  1485. end;
  1486. { TUCA_DataBook }
  1487. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1488. begin
  1489. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1490. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1491. end;
  1492. { TUCA_PropItemRec }
  1493. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  1494. begin
  1495. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  1496. end;
  1497. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  1498. begin
  1499. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  1500. end;
  1501. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1502. begin
  1503. if HasCodePoint() then begin
  1504. if Contextual then
  1505. Result := Unaligned(
  1506. PUInt24(
  1507. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1508. Cardinal(GetContext()^.Size)
  1509. )^
  1510. )
  1511. else
  1512. Result := Unaligned(PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^)
  1513. end else begin
  1514. {$ifdef uni_debug}
  1515. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1516. {$else uni_debug}
  1517. Result := ZERO_UINT24;
  1518. {$endif uni_debug}
  1519. end
  1520. end;
  1521. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1522. begin
  1523. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1524. end;
  1525. function TUCA_PropItemRec.IsValid() : Boolean;
  1526. begin
  1527. Result := IsBitON(Flags,FLAG_VALID);
  1528. end;
  1529. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1530. begin
  1531. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1532. end;}
  1533. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1534. var
  1535. c : Integer;
  1536. p : PByte;
  1537. pd : PUCA_PropWeights;
  1538. begin
  1539. c := WeightLength;
  1540. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1541. pd := ADest;
  1542. pd^.Weights[0] := Unaligned(PWord(p)^);
  1543. p := p + 2;
  1544. if not IsWeightCompress_1() then begin
  1545. pd^.Weights[1] := Unaligned(PWord(p)^);
  1546. p := p + 2;
  1547. end else begin
  1548. pd^.Weights[1] := p^;
  1549. p := p + 1;
  1550. end;
  1551. if not IsWeightCompress_2() then begin
  1552. pd^.Weights[2] := Unaligned(PWord(p)^);
  1553. p := p + 2;
  1554. end else begin
  1555. pd^.Weights[2] := p^;
  1556. p := p + 1;
  1557. end;
  1558. if (c > 1) then
  1559. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1560. end;
  1561. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  1562. begin
  1563. Result := SizeOf(TUCA_PropItemRec);
  1564. if (WeightLength > 0) then begin
  1565. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1566. if IsWeightCompress_1() then
  1567. Result := Result - 1;
  1568. if IsWeightCompress_2() then
  1569. Result := Result - 1;
  1570. end;
  1571. if HasCodePoint() then
  1572. Result := Result + SizeOf(UInt24);
  1573. if Contextual then
  1574. Result := Result + Cardinal(GetContext()^.Size);
  1575. end;
  1576. function TUCA_PropItemRec.GetContextual: Boolean;
  1577. begin
  1578. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1579. end;
  1580. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1581. var
  1582. p : PtrUInt;
  1583. begin
  1584. if not Contextual then
  1585. exit(nil);
  1586. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1587. if IsBitON(Flags,FLAG_CODEPOINT) then
  1588. p := p + SizeOf(UInt24);
  1589. Result := PUCA_PropItemContextTreeRec(p);
  1590. end;
  1591. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1592. begin
  1593. Result := IsBitON(Flags,FLAG_DELETION);
  1594. end;
  1595. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1596. var
  1597. i : Cardinal;
  1598. begin
  1599. if (ABook^.BMP_Table2 = nil) then
  1600. exit(nil);
  1601. i := ABook^.BMP_Table2[
  1602. (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
  1603. Lo(Word(AChar))
  1604. ];
  1605. if (i > 0) then
  1606. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1607. else
  1608. Result := nil;
  1609. end;
  1610. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1611. var
  1612. i : Cardinal;
  1613. begin
  1614. if (ABook^.OBMP_Table2 = nil) then
  1615. exit(nil);
  1616. i := ABook^.OBMP_Table2[
  1617. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1618. Word(ALowS) - LOW_SURROGATE_BEGIN
  1619. ];
  1620. if (i > 0) then
  1621. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1622. else
  1623. Result := nil;
  1624. end;
  1625. {$include weight_derivation.inc}
  1626. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1627. var
  1628. bb : TUCASortKey;
  1629. begin
  1630. SetLength(bb,Length(B));
  1631. if (Length(bb) > 0) then
  1632. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1633. Result := CompareSortKey(A,bb);
  1634. end;
  1635. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1636. var
  1637. i, hb : Integer;
  1638. begin
  1639. if (Pointer(A) = Pointer(B)) then
  1640. exit(0);
  1641. Result := 1;
  1642. hb := Length(B) - 1;
  1643. for i := 0 to Length(A) - 1 do begin
  1644. if (i > hb) then
  1645. exit;
  1646. if (A[i] < B[i]) then
  1647. exit(-1);
  1648. if (A[i] > B[i]) then
  1649. exit(1);
  1650. end;
  1651. if (Length(A) = Length(B)) then
  1652. exit(0);
  1653. exit(-1);
  1654. end;
  1655. type
  1656. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1657. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1658. var
  1659. r : TUCASortKey;
  1660. i, c, k, ral, levelCount : Integer;
  1661. pce : PUCA_PropWeights;
  1662. begin
  1663. c := Length(ACEList);
  1664. if (c = 0) then
  1665. exit(nil);
  1666. levelCount := Length(ACEList[0].Weights);
  1667. SetLength(r,(levelCount*c + levelCount));
  1668. ral := 0;
  1669. for i := 0 to levelCount - 1 do begin
  1670. if not ACollation^.Backwards[i] then begin
  1671. pce := @ACEList[0];
  1672. for k := 0 to c - 1 do begin
  1673. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1674. r[ral] := pce^.Weights[i];
  1675. ral := ral + 1;
  1676. end;
  1677. pce := pce + 1;
  1678. end;
  1679. end else begin
  1680. pce := @ACEList[c-1];
  1681. for k := 0 to c - 1 do begin
  1682. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1683. r[ral] := pce^.Weights[i];
  1684. ral := ral + 1;
  1685. end;
  1686. pce := pce - 1;
  1687. end;
  1688. end;
  1689. r[ral] := 0;
  1690. ral := ral + 1;
  1691. end;
  1692. ral := ral - 1;
  1693. SetLength(r,ral);
  1694. Result := r;
  1695. end;
  1696. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1697. var
  1698. r : TUCASortKey;
  1699. i, c, k, ral, levelCount : Integer;
  1700. pce : PUCA_PropWeights;
  1701. begin
  1702. c := Length(ACEList);
  1703. if (c = 0) then
  1704. exit(nil);
  1705. levelCount := Length(ACEList[0].Weights);
  1706. SetLength(r,(levelCount*c + levelCount));
  1707. ral := 0;
  1708. for i := 0 to levelCount - 1 do begin
  1709. if not ACollation^.Backwards[i] then begin
  1710. pce := @ACEList[0];
  1711. for k := 0 to c - 1 do begin
  1712. if (pce^.Weights[i] <> 0) then begin
  1713. r[ral] := pce^.Weights[i];
  1714. ral := ral + 1;
  1715. end;
  1716. pce := pce + 1;
  1717. end;
  1718. end else begin
  1719. pce := @ACEList[c-1];
  1720. for k := 0 to c - 1 do begin
  1721. if (pce^.Weights[i] <> 0) then begin
  1722. r[ral] := pce^.Weights[i];
  1723. ral := ral + 1;
  1724. end;
  1725. pce := pce - 1;
  1726. end;
  1727. end;
  1728. r[ral] := 0;
  1729. ral := ral + 1;
  1730. end;
  1731. ral := ral - 1;
  1732. SetLength(r,ral);
  1733. Result := r;
  1734. end;
  1735. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1736. var
  1737. r : TUCASortKey;
  1738. i, c, k, ral, levelCount : Integer;
  1739. pce : PUCA_PropWeights;
  1740. variableState : Boolean;
  1741. begin
  1742. c := Length(ACEList);
  1743. if (c = 0) then
  1744. exit(nil);
  1745. levelCount := Length(ACEList[0].Weights);
  1746. SetLength(r,(levelCount*c + levelCount));
  1747. ral := 0;
  1748. for i := 0 to levelCount - 1 do begin
  1749. if not ACollation^.Backwards[i] then begin
  1750. variableState := False;
  1751. pce := @ACEList[0];
  1752. for k := 0 to c - 1 do begin
  1753. if not ACollation^.IsVariable(pce) then begin
  1754. if (pce^.Weights[0] <> 0) then
  1755. variableState := False;
  1756. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1757. r[ral] := pce^.Weights[i];
  1758. ral := ral + 1;
  1759. end;
  1760. end else begin
  1761. variableState := True;
  1762. end;
  1763. pce := pce + 1;
  1764. end;
  1765. end else begin
  1766. pce := @ACEList[c-1];
  1767. for k := 0 to c - 1 do begin
  1768. if not ACollation^.IsVariable(pce) then begin
  1769. if (pce^.Weights[0] <> 0) then
  1770. variableState := False;
  1771. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1772. r[ral] := pce^.Weights[i];
  1773. ral := ral + 1;
  1774. end;
  1775. end else begin
  1776. variableState := True;
  1777. end;
  1778. pce := pce - 1;
  1779. end;
  1780. end;
  1781. r[ral] := 0;
  1782. ral := ral + 1;
  1783. end;
  1784. ral := ral - 1;
  1785. SetLength(r,ral);
  1786. Result := r;
  1787. end;
  1788. function FormKeyShiftedTrimmed(
  1789. const ACEList : TUCA_PropWeightsArray;
  1790. const ACollation : PUCA_DataBook
  1791. ) : TUCASortKey;
  1792. var
  1793. i : Integer;
  1794. p : ^TUCASortKeyItem;
  1795. begin
  1796. Result := FormKeyShifted(ACEList,ACollation);
  1797. i := Length(Result) - 1;
  1798. if (i >= 0) then begin
  1799. p := @Result[i];
  1800. while (i >= 0) do begin
  1801. if (p^ <> $FFFF) then
  1802. Break;
  1803. Dec(i);
  1804. Dec(p);
  1805. end;
  1806. if ((i+1) < Length(Result)) then
  1807. SetLength(Result,(i+1));
  1808. end;
  1809. end;
  1810. function FindChild(
  1811. const ACodePoint : Cardinal;
  1812. const AParent : PUCA_PropItemRec
  1813. ) : PUCA_PropItemRec;inline;
  1814. var
  1815. k : Integer;
  1816. begin
  1817. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1818. for k := 0 to AParent^.ChildCount - 1 do begin
  1819. if (ACodePoint = Result^.CodePoint) then
  1820. exit;
  1821. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1822. end;
  1823. Result := nil;
  1824. end;
  1825. function ComputeSortKey(
  1826. const AString : UnicodeString;
  1827. const ACollation : PUCA_DataBook
  1828. ) : TUCASortKey;
  1829. begin
  1830. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1831. end;
  1832. function ComputeRawSortKey(
  1833. const AStr : PUnicodeChar;
  1834. const ALength : SizeInt;
  1835. const ACollation : PUCA_DataBook
  1836. ) : TUCA_PropWeightsArray;
  1837. var
  1838. r : TUCA_PropWeightsArray;
  1839. ral {used length of "r"}: Integer;
  1840. rl {capacity of "r"} : Integer;
  1841. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1842. begin
  1843. if (rl < AMinGrow) then
  1844. rl := rl + AMinGrow
  1845. else
  1846. rl := 2 * rl;
  1847. SetLength(r,rl);
  1848. end;
  1849. var
  1850. i : Integer;
  1851. s : UnicodeString;
  1852. ps : PUnicodeChar;
  1853. cp : Cardinal;
  1854. cl : PUCA_DataBook;
  1855. pp : PUCA_PropItemRec;
  1856. ppLevel : Byte;
  1857. removedCharIndex : array of DWord;
  1858. removedCharIndexLength : DWord;
  1859. locHistory : array[0..24] of record
  1860. i : Integer;
  1861. cl : PUCA_DataBook;
  1862. pp : PUCA_PropItemRec;
  1863. ppLevel : Byte;
  1864. cp : Cardinal;
  1865. removedCharIndexLength : DWord;
  1866. end;
  1867. locHistoryTop : Integer;
  1868. suppressState : record
  1869. cl : PUCA_DataBook;
  1870. CharCount : Integer;
  1871. end;
  1872. LastKeyOwner : record
  1873. Length : Integer;
  1874. Chars : array[0..24] of UInt24;
  1875. end;
  1876. procedure SaveKeyOwner();
  1877. var
  1878. k : Integer;
  1879. kppLevel : Byte;
  1880. begin
  1881. k := 0;
  1882. kppLevel := High(Byte);
  1883. while (k <= locHistoryTop) do begin
  1884. if (kppLevel <> locHistory[k].ppLevel) then begin
  1885. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1886. kppLevel := locHistory[k].ppLevel;
  1887. end;
  1888. k := k + 1;
  1889. end;
  1890. if (k = 0) or (kppLevel <> ppLevel) then begin
  1891. LastKeyOwner.Chars[k] := cp;
  1892. k := k + 1;
  1893. end;
  1894. LastKeyOwner.Length := k;
  1895. end;
  1896. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1897. begin
  1898. SaveKeyOwner();
  1899. if ((ral + AItem^.WeightLength) > rl) then
  1900. GrowKey(AItem^.WeightLength);
  1901. AItem^.GetWeightArray(@r[ral]);
  1902. ral := ral + AItem^.WeightLength;
  1903. end;
  1904. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1905. begin
  1906. if ((ral + AItem^.WeightCount) > rl) then
  1907. GrowKey(AItem^.WeightCount);
  1908. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1909. ral := ral + AItem^.WeightCount;
  1910. end;
  1911. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1912. begin
  1913. SaveKeyOwner();
  1914. if ((ral + 2) > rl) then
  1915. GrowKey();
  1916. DeriveWeight(ACodePoint,@r[ral]);
  1917. ral := ral + 2;
  1918. end;
  1919. procedure RecordDeletion();inline;
  1920. begin
  1921. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1922. if (suppressState.cl = nil) or
  1923. (suppressState.CharCount > ppLevel)
  1924. then begin
  1925. suppressState.cl := cl;
  1926. suppressState.CharCount := ppLevel;
  1927. end;
  1928. end;
  1929. end;
  1930. procedure RecordStep();inline;
  1931. begin
  1932. Inc(locHistoryTop);
  1933. locHistory[locHistoryTop].i := i;
  1934. locHistory[locHistoryTop].cl := cl;
  1935. locHistory[locHistoryTop].pp := pp;
  1936. locHistory[locHistoryTop].ppLevel := ppLevel;
  1937. locHistory[locHistoryTop].cp := cp;
  1938. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1939. RecordDeletion();
  1940. end;
  1941. procedure ClearHistory();inline;
  1942. begin
  1943. locHistoryTop := -1;
  1944. end;
  1945. function HasHistory() : Boolean;inline;
  1946. begin
  1947. Result := (locHistoryTop >= 0);
  1948. end;
  1949. function GetHistoryLength() : Integer;inline;
  1950. begin
  1951. Result := (locHistoryTop + 1);
  1952. end;
  1953. procedure GoBack();inline;
  1954. begin
  1955. Assert(locHistoryTop >= 0);
  1956. i := locHistory[locHistoryTop].i;
  1957. cp := locHistory[locHistoryTop].cp;
  1958. cl := locHistory[locHistoryTop].cl;
  1959. pp := locHistory[locHistoryTop].pp;
  1960. ppLevel := locHistory[locHistoryTop].ppLevel;
  1961. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1962. ps := @s[i];
  1963. Dec(locHistoryTop);
  1964. end;
  1965. var
  1966. c : Integer;
  1967. lastUnblockedNonstarterCCC : Byte;
  1968. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1969. var
  1970. k : DWord;
  1971. pk : PUnicodeChar;
  1972. puk : PUC_Prop;
  1973. begin
  1974. k := AStartFrom;
  1975. if (k > c) then
  1976. exit(False);
  1977. if (removedCharIndexLength>0) and
  1978. (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0)
  1979. then begin
  1980. exit(False);
  1981. end;
  1982. {if (k = (i+1)) or
  1983. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1984. then
  1985. lastUnblockedNonstarterCCC := 0;}
  1986. pk := @s[k];
  1987. if UnicodeIsHighSurrogate(pk^) then begin
  1988. if (k = c) then
  1989. exit(False);
  1990. if UnicodeIsLowSurrogate(pk[1]) then
  1991. puk := GetProps(pk[0],pk[1])
  1992. else
  1993. puk := GetProps(Word(pk^));
  1994. end else begin
  1995. puk := GetProps(Word(pk^));
  1996. end;
  1997. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1998. exit(False);
  1999. lastUnblockedNonstarterCCC := puk^.CCC;
  2000. Result := True;
  2001. end;
  2002. procedure RemoveChar(APos : Integer);inline;
  2003. begin
  2004. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2005. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2006. removedCharIndex[removedCharIndexLength] := APos;
  2007. Inc(removedCharIndexLength);
  2008. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  2009. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2010. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2011. removedCharIndex[removedCharIndexLength] := APos+1;
  2012. Inc(removedCharIndexLength);
  2013. end;
  2014. end;
  2015. procedure Inc_I();inline;
  2016. begin
  2017. if (removedCharIndexLength = 0) then begin
  2018. Inc(i);
  2019. Inc(ps);
  2020. exit;
  2021. end;
  2022. while True do begin
  2023. Inc(i);
  2024. Inc(ps);
  2025. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  2026. Break;
  2027. end;
  2028. end;
  2029. var
  2030. surrogateState : Boolean;
  2031. function MoveToNextChar() : Boolean;inline;
  2032. begin
  2033. Result := True;
  2034. if UnicodeIsHighSurrogate(ps[0]) then begin
  2035. if (i = c) then
  2036. exit(False);
  2037. if UnicodeIsLowSurrogate(ps[1]) then begin
  2038. surrogateState := True;
  2039. cp := ToUCS4(ps[0],ps[1]);
  2040. end else begin
  2041. surrogateState := False;
  2042. cp := Word(ps[0]);
  2043. end;
  2044. end else begin
  2045. surrogateState := False;
  2046. cp := Word(ps[0]);
  2047. end;
  2048. end;
  2049. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  2050. begin
  2051. cl := nil;
  2052. pp := nil;
  2053. ppLevel := 0;
  2054. if AClearSuppressInfo then begin
  2055. suppressState.cl := nil;
  2056. suppressState.CharCount := 0;
  2057. end;
  2058. end;
  2059. function FindPropUCA() : Boolean;
  2060. var
  2061. candidateCL : PUCA_DataBook;
  2062. begin
  2063. pp := nil;
  2064. if (cl = nil) then
  2065. candidateCL := ACollation
  2066. else
  2067. candidateCL := cl;
  2068. if surrogateState then begin
  2069. while (candidateCL <> nil) do begin
  2070. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  2071. if (pp <> nil) then
  2072. break;
  2073. candidateCL := candidateCL^.Base;
  2074. end;
  2075. end else begin
  2076. while (candidateCL <> nil) do begin
  2077. pp := GetPropUCA(ps[0],candidateCL);
  2078. if (pp <> nil) then
  2079. break;
  2080. candidateCL := candidateCL^.Base;
  2081. end;
  2082. end;
  2083. cl := candidateCL;
  2084. Result := (pp <> nil);
  2085. end;
  2086. procedure AddWeightsAndClear();inline;
  2087. var
  2088. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2089. begin
  2090. if (pp^.WeightLength > 0) then begin
  2091. AddWeights(pp);
  2092. end else
  2093. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2094. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2095. (ctxNode^.Data.WeightCount > 0)
  2096. then begin
  2097. AddContextWeights(@ctxNode^.Data);
  2098. end;
  2099. //AddWeights(pp);
  2100. ClearHistory();
  2101. ClearPP();
  2102. end;
  2103. procedure StartMatch();
  2104. procedure HandleLastChar();
  2105. var
  2106. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2107. begin
  2108. while True do begin
  2109. if pp^.IsValid() then begin
  2110. if (pp^.WeightLength > 0) then
  2111. AddWeights(pp)
  2112. else
  2113. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2114. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2115. (ctxNode^.Data.WeightCount > 0)
  2116. then
  2117. AddContextWeights(@ctxNode^.Data)
  2118. else
  2119. AddComputedWeights(cp){handle deletion of code point};
  2120. break;
  2121. end;
  2122. if (cl^.Base = nil) then begin
  2123. AddComputedWeights(cp);
  2124. break;
  2125. end;
  2126. cl := cl^.Base;
  2127. if not FindPropUCA() then begin
  2128. AddComputedWeights(cp);
  2129. break;
  2130. end;
  2131. end;
  2132. end;
  2133. var
  2134. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2135. begin
  2136. ppLevel := 0;
  2137. if not FindPropUCA() then begin
  2138. AddComputedWeights(cp);
  2139. ClearHistory();
  2140. ClearPP();
  2141. end else begin
  2142. if (i = c) then begin
  2143. HandleLastChar();
  2144. end else begin
  2145. if pp^.IsValid()then begin
  2146. if (pp^.ChildCount = 0) then begin
  2147. if (pp^.WeightLength > 0) then
  2148. AddWeights(pp)
  2149. else
  2150. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2151. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  2152. (tmpCtxNode^.Data.WeightCount > 0)
  2153. then
  2154. AddContextWeights(@tmpCtxNode^.Data)
  2155. else
  2156. AddComputedWeights(cp){handle deletion of code point};
  2157. ClearPP();
  2158. ClearHistory();
  2159. end else begin
  2160. RecordStep();
  2161. end
  2162. end else begin
  2163. if (pp^.ChildCount = 0) then begin
  2164. AddComputedWeights(cp);
  2165. ClearPP();
  2166. ClearHistory();
  2167. end else begin
  2168. RecordStep();
  2169. end;
  2170. end ;
  2171. end;
  2172. end;
  2173. end;
  2174. function TryPermutation() : Boolean;
  2175. var
  2176. kk : Integer;
  2177. b : Boolean;
  2178. puk : PUC_Prop;
  2179. ppk : PUCA_PropItemRec;
  2180. begin
  2181. Result := False;
  2182. puk := GetProps(cp);
  2183. if (puk^.CCC = 0) then
  2184. exit;
  2185. lastUnblockedNonstarterCCC := puk^.CCC;
  2186. if surrogateState then
  2187. kk := i + 2
  2188. else
  2189. kk := i + 1;
  2190. while IsUnblockedNonstarter(kk) do begin
  2191. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  2192. if b then
  2193. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  2194. else
  2195. ppk := FindChild(Word(s[kk]),pp);
  2196. if (ppk <> nil) then begin
  2197. pp := ppk;
  2198. RemoveChar(kk);
  2199. Inc(ppLevel);
  2200. RecordStep();
  2201. Result := True;
  2202. if (pp^.ChildCount = 0 ) then
  2203. Break;
  2204. end;
  2205. if b then
  2206. Inc(kk);
  2207. Inc(kk);
  2208. end;
  2209. end;
  2210. procedure AdvanceCharPos();inline;
  2211. begin
  2212. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  2213. Inc(i);
  2214. Inc(ps);
  2215. end;
  2216. Inc_I();
  2217. end;
  2218. var
  2219. ok : Boolean;
  2220. pp1 : PUCA_PropItemRec;
  2221. cltemp : PUCA_DataBook;
  2222. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2223. begin
  2224. if (ALength = 0) then
  2225. exit(nil);
  2226. c := ALength;
  2227. s := NormalizeNFD(AStr,c);
  2228. c := Length(s);
  2229. rl := 3*c;
  2230. SetLength(r,rl);
  2231. ral := 0;
  2232. ps := @s[1];
  2233. ClearPP();
  2234. locHistoryTop := -1;
  2235. removedCharIndexLength := 0;
  2236. FillByte(suppressState,SizeOf(suppressState),0);
  2237. LastKeyOwner.Length := 0;
  2238. i := 1;
  2239. while (i <= c) and MoveToNextChar() do begin
  2240. if (pp = nil) then begin // Start Matching
  2241. StartMatch();
  2242. end else begin
  2243. pp1 := FindChild(cp,pp);
  2244. if (pp1 <> nil) then begin
  2245. Inc(ppLevel);
  2246. pp := pp1;
  2247. if (pp^.ChildCount = 0) or (i = c) then begin
  2248. ok := False;
  2249. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2250. if (pp^.WeightLength > 0) then begin
  2251. AddWeightsAndClear();
  2252. ok := True;
  2253. end else
  2254. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2255. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2256. (ctxNode^.Data.WeightCount > 0)
  2257. then begin
  2258. AddContextWeights(@ctxNode^.Data);
  2259. ClearHistory();
  2260. ClearPP();
  2261. ok := True;
  2262. end
  2263. end;
  2264. if not ok then begin
  2265. RecordDeletion();
  2266. ok := False;
  2267. while HasHistory() do begin
  2268. GoBack();
  2269. if pp^.IsValid() and
  2270. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2271. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2272. )
  2273. then begin
  2274. AddWeightsAndClear();
  2275. ok := True;
  2276. Break;
  2277. end;
  2278. end;
  2279. if not ok then begin
  2280. cltemp := cl^.Base;
  2281. if (cltemp <> nil) then begin
  2282. ClearPP(False);
  2283. cl := cltemp;
  2284. Continue;
  2285. end;
  2286. end;
  2287. if not ok then begin
  2288. AddComputedWeights(cp);
  2289. ClearHistory();
  2290. ClearPP();
  2291. end;
  2292. end;
  2293. end else begin
  2294. RecordStep();
  2295. end;
  2296. end else begin
  2297. // permutations !
  2298. ok := False;
  2299. if TryPermutation() and pp^.IsValid() then begin
  2300. if (suppressState.CharCount = 0) then begin
  2301. AddWeightsAndClear();
  2302. Continue;
  2303. end;
  2304. while True do begin
  2305. if pp^.IsValid() and
  2306. (pp^.WeightLength > 0) and
  2307. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2308. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2309. )
  2310. then begin
  2311. AddWeightsAndClear();
  2312. ok := True;
  2313. break;
  2314. end;
  2315. if not HasHistory() then
  2316. break;
  2317. GoBack();
  2318. if (pp = nil) then
  2319. break;
  2320. end;
  2321. end;
  2322. if not ok then begin
  2323. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2324. if (pp^.WeightLength > 0) then begin
  2325. AddWeightsAndClear();
  2326. ok := True;
  2327. end else
  2328. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2329. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2330. (ctxNode^.Data.WeightCount > 0)
  2331. then begin
  2332. AddContextWeights(@ctxNode^.Data);
  2333. ClearHistory();
  2334. ClearPP();
  2335. ok := True;
  2336. end
  2337. end;
  2338. if ok then
  2339. Continue;
  2340. end;
  2341. if not ok then begin
  2342. if (cl^.Base <> nil) then begin
  2343. cltemp := cl^.Base;
  2344. while HasHistory() do
  2345. GoBack();
  2346. pp := nil;
  2347. ppLevel := 0;
  2348. cl := cltemp;
  2349. Continue;
  2350. end;
  2351. //walk back
  2352. ok := False;
  2353. while HasHistory() do begin
  2354. GoBack();
  2355. if pp^.IsValid() and
  2356. (pp^.WeightLength > 0) and
  2357. ( (suppressState.CharCount = 0) or
  2358. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2359. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2360. )
  2361. )
  2362. then begin
  2363. AddWeightsAndClear();
  2364. ok := True;
  2365. Break;
  2366. end;
  2367. end;
  2368. if ok then begin
  2369. AdvanceCharPos();
  2370. Continue;
  2371. end;
  2372. if (pp <> nil) then begin
  2373. AddComputedWeights(cp);
  2374. ClearHistory();
  2375. ClearPP();
  2376. end;
  2377. end;
  2378. end;
  2379. end;
  2380. if surrogateState then begin
  2381. Inc(ps);
  2382. Inc(i);
  2383. end;
  2384. //
  2385. Inc_I();
  2386. end;
  2387. SetLength(r,ral);
  2388. Result := r;
  2389. end;
  2390. type
  2391. TComputeKeyContext = record
  2392. Collation : PUCA_DataBook;
  2393. r : TUCA_PropWeightsArray;
  2394. ral {used length of "r"}: Integer;
  2395. rl {capacity of "r"} : Integer;
  2396. i : Integer;
  2397. s : UnicodeString;
  2398. ps : PUnicodeChar;
  2399. cp : Cardinal;
  2400. cl : PUCA_DataBook;
  2401. pp : PUCA_PropItemRec;
  2402. ppLevel : Byte;
  2403. removedCharIndex : array of DWord;
  2404. removedCharIndexLength : DWord;
  2405. locHistoryTop : Integer;
  2406. locHistory : array[0..24] of record
  2407. i : Integer;
  2408. cl : PUCA_DataBook;
  2409. pp : PUCA_PropItemRec;
  2410. ppLevel : Byte;
  2411. cp : Cardinal;
  2412. removedCharIndexLength : DWord;
  2413. end;
  2414. suppressState : record
  2415. cl : PUCA_DataBook;
  2416. CharCount : Integer;
  2417. end;
  2418. LastKeyOwner : record
  2419. Length : Integer;
  2420. Chars : array[0..24] of UInt24;
  2421. end;
  2422. c : Integer;
  2423. lastUnblockedNonstarterCCC : Byte;
  2424. surrogateState : Boolean;
  2425. Finished : Boolean;
  2426. end;
  2427. PComputeKeyContext = ^TComputeKeyContext;
  2428. procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
  2429. begin
  2430. AContext^.cl := nil;
  2431. AContext^.pp := nil;
  2432. AContext^.ppLevel := 0;
  2433. if AClearSuppressInfo then begin
  2434. AContext^.suppressState.cl := nil;
  2435. AContext^.suppressState.CharCount := 0;
  2436. end;
  2437. end;
  2438. procedure InitContext(
  2439. AContext : PComputeKeyContext;
  2440. const AStr : PUnicodeChar;
  2441. const ALength : SizeInt;
  2442. const ACollation : PUCA_DataBook
  2443. );
  2444. begin
  2445. AContext^.Collation := ACollation;
  2446. AContext^.c := ALength;
  2447. AContext^.s := NormalizeNFD(AStr,AContext^.c);
  2448. AContext^.c := Length(AContext^.s);
  2449. AContext^.rl := 3*AContext^.c;
  2450. SetLength(AContext^.r,AContext^.rl);
  2451. AContext^.ral := 0;
  2452. AContext^.ps := @AContext^.s[1];
  2453. ClearPP(AContext);
  2454. AContext^.locHistoryTop := -1;
  2455. AContext^.removedCharIndexLength := 0;
  2456. FillByte(AContext^.suppressState,SizeOf(AContext^.suppressState),0);
  2457. AContext^.LastKeyOwner.Length := 0;
  2458. AContext^.i := 1;
  2459. AContext^.Finished := False;
  2460. end;
  2461. function FormKey(
  2462. const AWeightArray : TUCA_PropWeightsArray;
  2463. const ACollation : PUCA_DataBook
  2464. ) : TUCASortKey;inline;
  2465. begin
  2466. case ACollation.VariableWeight of
  2467. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
  2468. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
  2469. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
  2470. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
  2471. else
  2472. Result := FormKeyShifted(AWeightArray,ACollation);
  2473. end;
  2474. end;
  2475. function ComputeRawSortKeyNextItem(
  2476. const AContext : PComputeKeyContext
  2477. ) : Boolean;forward;
  2478. function IncrementalCompareString_NonIgnorable(
  2479. const AStrA : PUnicodeChar;
  2480. const ALengthA : SizeInt;
  2481. const AStrB : PUnicodeChar;
  2482. const ALengthB : SizeInt;
  2483. const ACollation : PUCA_DataBook
  2484. ) : Integer;
  2485. var
  2486. ctxA, ctxB : TComputeKeyContext;
  2487. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2488. keyIndexB : Integer;
  2489. keyA, keyB : TUCASortKey;
  2490. begin
  2491. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2492. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2493. (ALengthA = ALengthB)
  2494. )
  2495. then
  2496. exit(0);
  2497. if (ALengthA = 0) then
  2498. exit(-1);
  2499. if (ALengthB = 0) then
  2500. exit(1);
  2501. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2502. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2503. lastKeyIndexA := -1;
  2504. keyIndexA := -1;
  2505. lengthMaxA := 0;
  2506. keyIndexB := -1;
  2507. while True do begin
  2508. if not ComputeRawSortKeyNextItem(@ctxA) then
  2509. Break;
  2510. if (ctxA.ral = lengthMaxA) then
  2511. Continue;
  2512. lengthMaxA := ctxA.ral;
  2513. keyIndexA := lastKeyIndexA + 1;
  2514. while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
  2515. Inc(keyIndexA);
  2516. end;
  2517. if (keyIndexA = lengthMaxA) then begin
  2518. lastKeyIndexA := keyIndexA-1;
  2519. Continue;
  2520. end;
  2521. while (keyIndexA < lengthMaxA) do begin
  2522. if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
  2523. Inc(keyIndexA);
  2524. Continue;
  2525. end;
  2526. Inc(keyIndexB);
  2527. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2528. if (ctxB.ral <= keyIndexB) then begin
  2529. if not ComputeRawSortKeyNextItem(@ctxB) then
  2530. Break;
  2531. Continue;
  2532. end;
  2533. Inc(keyIndexB);
  2534. end;
  2535. if (ctxB.ral <= keyIndexB) then
  2536. exit(1);
  2537. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2538. exit(1);
  2539. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2540. exit(-1);
  2541. Inc(keyIndexA);
  2542. end;
  2543. lastKeyIndexA := keyIndexA - 1;
  2544. end;
  2545. //Key(A) is completed !
  2546. Inc(keyIndexB);
  2547. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2548. if (ctxB.ral <= keyIndexB) then begin
  2549. if not ComputeRawSortKeyNextItem(@ctxB) then
  2550. Break;
  2551. Continue;
  2552. end;
  2553. Inc(keyIndexB);
  2554. end;
  2555. if (ctxB.ral > keyIndexB) then begin
  2556. //B has at least one more primary weight that A
  2557. exit(-1);
  2558. end;
  2559. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2560. //
  2561. end;
  2562. //Key(B) is completed !
  2563. keyA := FormKey(ctxA.r,ctxA.Collation);
  2564. keyB := FormKey(ctxB.r,ctxB.Collation);
  2565. Result := CompareSortKey(keyA,keyB);
  2566. end;
  2567. function IncrementalCompareString_Shift(
  2568. const AStrA : PUnicodeChar;
  2569. const ALengthA : SizeInt;
  2570. const AStrB : PUnicodeChar;
  2571. const ALengthB : SizeInt;
  2572. const ACollation : PUCA_DataBook
  2573. ) : Integer;
  2574. var
  2575. ctxA, ctxB : TComputeKeyContext;
  2576. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2577. keyIndexB : Integer;
  2578. keyA, keyB : TUCASortKey;
  2579. begin
  2580. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2581. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2582. (ALengthA = ALengthB)
  2583. )
  2584. then
  2585. exit(0);
  2586. if (ALengthA = 0) then
  2587. exit(-1);
  2588. if (ALengthB = 0) then
  2589. exit(1);
  2590. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2591. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2592. lastKeyIndexA := -1;
  2593. keyIndexA := -1;
  2594. lengthMaxA := 0;
  2595. keyIndexB := -1;
  2596. while True do begin
  2597. if not ComputeRawSortKeyNextItem(@ctxA) then
  2598. Break;
  2599. if (ctxA.ral = lengthMaxA) then
  2600. Continue;
  2601. lengthMaxA := ctxA.ral;
  2602. keyIndexA := lastKeyIndexA + 1;
  2603. while (keyIndexA < lengthMaxA) and
  2604. ( (ctxA.r[keyIndexA].Weights[0] = 0) or
  2605. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2606. )
  2607. do begin
  2608. Inc(keyIndexA);
  2609. end;
  2610. if (keyIndexA = lengthMaxA) then begin
  2611. lastKeyIndexA := keyIndexA-1;
  2612. Continue;
  2613. end;
  2614. while (keyIndexA < lengthMaxA) do begin
  2615. if (ctxA.r[keyIndexA].Weights[0] = 0) or
  2616. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2617. then begin
  2618. Inc(keyIndexA);
  2619. Continue;
  2620. end;
  2621. Inc(keyIndexB);
  2622. while (ctxB.ral <= keyIndexB) or
  2623. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2624. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2625. do begin
  2626. if (ctxB.ral <= keyIndexB) then begin
  2627. if not ComputeRawSortKeyNextItem(@ctxB) then
  2628. Break;
  2629. Continue;
  2630. end;
  2631. Inc(keyIndexB);
  2632. end;
  2633. if (ctxB.ral <= keyIndexB) then
  2634. exit(1);
  2635. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2636. exit(1);
  2637. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2638. exit(-1);
  2639. Inc(keyIndexA);
  2640. end;
  2641. lastKeyIndexA := keyIndexA - 1;
  2642. end;
  2643. //Key(A) is completed !
  2644. Inc(keyIndexB);
  2645. while (ctxB.ral <= keyIndexB) or
  2646. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2647. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2648. do begin
  2649. if (ctxB.ral <= keyIndexB) then begin
  2650. if not ComputeRawSortKeyNextItem(@ctxB) then
  2651. Break;
  2652. Continue;
  2653. end;
  2654. Inc(keyIndexB);
  2655. end;
  2656. if (ctxB.ral > keyIndexB) then begin
  2657. //B has at least one more primary weight that A
  2658. exit(-1);
  2659. end;
  2660. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2661. //
  2662. end;
  2663. //Key(B) is completed !
  2664. keyA := FormKey(ctxA.r,ctxA.Collation);
  2665. keyB := FormKey(ctxB.r,ctxB.Collation);
  2666. Result := CompareSortKey(keyA,keyB);
  2667. end;
  2668. function IncrementalCompareString(
  2669. const AStrA : PUnicodeChar;
  2670. const ALengthA : SizeInt;
  2671. const AStrB : PUnicodeChar;
  2672. const ALengthB : SizeInt;
  2673. const ACollation : PUCA_DataBook
  2674. ) : Integer;
  2675. begin
  2676. case ACollation^.VariableWeight of
  2677. TUCA_VariableKind.ucaNonIgnorable :
  2678. begin
  2679. Result := IncrementalCompareString_NonIgnorable(
  2680. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2681. );
  2682. end;
  2683. TUCA_VariableKind.ucaBlanked,
  2684. TUCA_VariableKind.ucaShiftedTrimmed,
  2685. TUCA_VariableKind.ucaIgnoreSP,
  2686. TUCA_VariableKind.ucaShifted:
  2687. begin
  2688. Result := IncrementalCompareString_Shift(
  2689. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2690. );
  2691. end;
  2692. else
  2693. begin
  2694. Result := IncrementalCompareString_Shift(
  2695. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2696. );
  2697. end;
  2698. end;
  2699. end;
  2700. function IncrementalCompareString(
  2701. const AStrA,
  2702. AStrB : UnicodeString;
  2703. const ACollation : PUCA_DataBook
  2704. ) : Integer;
  2705. begin
  2706. Result := IncrementalCompareString(
  2707. Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
  2708. ACollation
  2709. );
  2710. end;
  2711. function ComputeRawSortKeyNextItem(
  2712. const AContext : PComputeKeyContext
  2713. ) : Boolean;
  2714. var
  2715. ctx : PComputeKeyContext;
  2716. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  2717. begin
  2718. if (ctx^.rl < AMinGrow) then
  2719. ctx^.rl := ctx^.rl + AMinGrow
  2720. else
  2721. ctx^.rl := 2 * ctx^.rl;
  2722. SetLength(ctx^.r,ctx^.rl);
  2723. end;
  2724. procedure SaveKeyOwner();
  2725. var
  2726. k : Integer;
  2727. kppLevel : Byte;
  2728. begin
  2729. k := 0;
  2730. kppLevel := High(Byte);
  2731. while (k <= ctx^.locHistoryTop) do begin
  2732. if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
  2733. ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
  2734. kppLevel := ctx^.locHistory[k].ppLevel;
  2735. end;
  2736. k := k + 1;
  2737. end;
  2738. if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
  2739. ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
  2740. k := k + 1;
  2741. end;
  2742. ctx^.LastKeyOwner.Length := k;
  2743. end;
  2744. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  2745. begin
  2746. SaveKeyOwner();
  2747. if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
  2748. GrowKey(AItem^.WeightLength);
  2749. AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
  2750. ctx^.ral := ctx^.ral + AItem^.WeightLength;
  2751. end;
  2752. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  2753. begin
  2754. if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
  2755. GrowKey(AItem^.WeightCount);
  2756. Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
  2757. ctx^.ral := ctx^.ral + AItem^.WeightCount;
  2758. end;
  2759. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  2760. begin
  2761. SaveKeyOwner();
  2762. if ((ctx^.ral + 2) > ctx^.rl) then
  2763. GrowKey();
  2764. DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
  2765. ctx^.ral := ctx^.ral + 2;
  2766. end;
  2767. procedure RecordDeletion();inline;
  2768. begin
  2769. if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  2770. if (ctx^.suppressState.cl = nil) or
  2771. (ctx^.suppressState.CharCount > ctx^.ppLevel)
  2772. then begin
  2773. ctx^.suppressState.cl := ctx^.cl;
  2774. ctx^.suppressState.CharCount := ctx^.ppLevel;
  2775. end;
  2776. end;
  2777. end;
  2778. procedure RecordStep();inline;
  2779. begin
  2780. Inc(ctx^.locHistoryTop);
  2781. ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
  2782. ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
  2783. ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
  2784. ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
  2785. ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
  2786. ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
  2787. RecordDeletion();
  2788. end;
  2789. procedure ClearHistory();inline;
  2790. begin
  2791. ctx^.locHistoryTop := -1;
  2792. end;
  2793. function HasHistory() : Boolean;inline;
  2794. begin
  2795. Result := (ctx^.locHistoryTop >= 0);
  2796. end;
  2797. function GetHistoryLength() : Integer;inline;
  2798. begin
  2799. Result := (ctx^.locHistoryTop + 1);
  2800. end;
  2801. procedure GoBack();inline;
  2802. begin
  2803. Assert(ctx^.locHistoryTop >= 0);
  2804. ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
  2805. ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
  2806. ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
  2807. ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
  2808. ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
  2809. ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
  2810. ctx^.ps := @ctx^.s[ctx^.i];
  2811. Dec(ctx^.locHistoryTop);
  2812. end;
  2813. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  2814. var
  2815. k : DWord;
  2816. pk : PUnicodeChar;
  2817. puk : PUC_Prop;
  2818. begin
  2819. k := AStartFrom;
  2820. if (k > ctx^.c) then
  2821. exit(False);
  2822. if (ctx^.removedCharIndexLength>0) and
  2823. (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,k) >= 0)
  2824. then begin
  2825. exit(False);
  2826. end;
  2827. {if (k = (i+1)) or
  2828. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  2829. then
  2830. lastUnblockedNonstarterCCC := 0;}
  2831. pk := @ctx^.s[k];
  2832. if UnicodeIsHighSurrogate(pk^) then begin
  2833. if (k = ctx^.c) then
  2834. exit(False);
  2835. if UnicodeIsLowSurrogate(pk[1]) then
  2836. puk := GetProps(pk[0],pk[1])
  2837. else
  2838. puk := GetProps(Word(pk^));
  2839. end else begin
  2840. puk := GetProps(Word(pk^));
  2841. end;
  2842. if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
  2843. exit(False);
  2844. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  2845. Result := True;
  2846. end;
  2847. procedure RemoveChar(APos : Integer);inline;
  2848. begin
  2849. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2850. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2851. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
  2852. Inc(ctx^.removedCharIndexLength);
  2853. if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
  2854. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2855. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2856. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
  2857. Inc(ctx^.removedCharIndexLength);
  2858. end;
  2859. end;
  2860. procedure Inc_I();inline;
  2861. begin
  2862. if (ctx^.removedCharIndexLength = 0) then begin
  2863. Inc(ctx^.i);
  2864. Inc(ctx^.ps);
  2865. exit;
  2866. end;
  2867. while True do begin
  2868. Inc(ctx^.i);
  2869. Inc(ctx^.ps);
  2870. if (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,ctx^.i) = -1) then
  2871. Break;
  2872. end;
  2873. end;
  2874. function MoveToNextChar() : Boolean;inline;
  2875. begin
  2876. Result := True;
  2877. if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
  2878. if (ctx^.i = ctx^.c) then
  2879. exit(False);
  2880. if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  2881. ctx^.surrogateState := True;
  2882. ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
  2883. end else begin
  2884. ctx^.surrogateState := False;
  2885. ctx^.cp := Word(ctx^.ps[0]);
  2886. end;
  2887. end else begin
  2888. ctx^.surrogateState := False;
  2889. ctx^.cp := Word(ctx^.ps[0]);
  2890. end;
  2891. end;
  2892. function FindPropUCA() : Boolean;
  2893. var
  2894. candidateCL : PUCA_DataBook;
  2895. begin
  2896. ctx^.pp := nil;
  2897. if (ctx^.cl = nil) then
  2898. candidateCL := ctx^.Collation
  2899. else
  2900. candidateCL := ctx^.cl;
  2901. if ctx^.surrogateState then begin
  2902. while (candidateCL <> nil) do begin
  2903. ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
  2904. if (ctx^.pp <> nil) then
  2905. break;
  2906. candidateCL := candidateCL^.Base;
  2907. end;
  2908. end else begin
  2909. while (candidateCL <> nil) do begin
  2910. ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
  2911. if (ctx^.pp <> nil) then
  2912. break;
  2913. candidateCL := candidateCL^.Base;
  2914. end;
  2915. end;
  2916. ctx^.cl := candidateCL;
  2917. Result := (ctx^.pp <> nil);
  2918. end;
  2919. procedure AddWeightsAndClear();inline;
  2920. var
  2921. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2922. begin
  2923. if (ctx^.pp^.WeightLength > 0) then begin
  2924. AddWeights(ctx^.pp);
  2925. end else
  2926. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2927. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2928. (ctxNode^.Data.WeightCount > 0)
  2929. then begin
  2930. AddContextWeights(@ctxNode^.Data);
  2931. end;
  2932. //AddWeights(pp);
  2933. ClearHistory();
  2934. ClearPP(ctx);
  2935. end;
  2936. function StartMatch() : Boolean;
  2937. procedure HandleLastChar();
  2938. var
  2939. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2940. begin
  2941. while True do begin
  2942. if ctx^.pp^.IsValid() then begin
  2943. if (ctx^.pp^.WeightLength > 0) then
  2944. AddWeights(ctx^.pp)
  2945. else
  2946. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2947. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2948. (ctxNode^.Data.WeightCount > 0)
  2949. then
  2950. AddContextWeights(@ctxNode^.Data)
  2951. else
  2952. AddComputedWeights(ctx^.cp){handle deletion of code point};
  2953. break;
  2954. end;
  2955. if (ctx^.cl^.Base = nil) then begin
  2956. AddComputedWeights(ctx^.cp);
  2957. break;
  2958. end;
  2959. ctx^.cl := ctx^.cl^.Base;
  2960. if not FindPropUCA() then begin
  2961. AddComputedWeights(ctx^.cp);
  2962. break;
  2963. end;
  2964. end;
  2965. end;
  2966. var
  2967. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2968. begin
  2969. Result := False;
  2970. ctx^.ppLevel := 0;
  2971. if not FindPropUCA() then begin
  2972. AddComputedWeights(ctx^.cp);
  2973. ClearHistory();
  2974. ClearPP(ctx);
  2975. Result := True;
  2976. end else begin
  2977. if (ctx^.i = ctx^.c) then begin
  2978. HandleLastChar();
  2979. Result := True;
  2980. end else begin
  2981. if ctx^.pp^.IsValid()then begin
  2982. if (ctx^.pp^.ChildCount = 0) then begin
  2983. if (ctx^.pp^.WeightLength > 0) then
  2984. AddWeights(ctx^.pp)
  2985. else
  2986. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2987. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
  2988. (tmpCtxNode^.Data.WeightCount > 0)
  2989. then
  2990. AddContextWeights(@tmpCtxNode^.Data)
  2991. else
  2992. AddComputedWeights(ctx^.cp){handle deletion of code point};
  2993. ClearPP(ctx);
  2994. ClearHistory();
  2995. Result := True;
  2996. end else begin
  2997. RecordStep();
  2998. end
  2999. end else begin
  3000. if (ctx^.pp^.ChildCount = 0) then begin
  3001. AddComputedWeights(ctx^.cp);
  3002. ClearPP(ctx);
  3003. ClearHistory();
  3004. Result := True;
  3005. end else begin
  3006. RecordStep();
  3007. end;
  3008. end;
  3009. end;
  3010. end;
  3011. end;
  3012. function TryPermutation() : Boolean;
  3013. var
  3014. kk : Integer;
  3015. b : Boolean;
  3016. puk : PUC_Prop;
  3017. ppk : PUCA_PropItemRec;
  3018. begin
  3019. Result := False;
  3020. puk := GetProps(ctx^.cp);
  3021. if (puk^.CCC = 0) then
  3022. exit;
  3023. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  3024. if ctx^.surrogateState then
  3025. kk := ctx^.i + 2
  3026. else
  3027. kk := ctx^.i + 1;
  3028. while IsUnblockedNonstarter(kk) do begin
  3029. b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
  3030. if b then
  3031. ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
  3032. else
  3033. ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
  3034. if (ppk <> nil) then begin
  3035. ctx^.pp := ppk;
  3036. RemoveChar(kk);
  3037. Inc(ctx^.ppLevel);
  3038. RecordStep();
  3039. Result := True;
  3040. if (ctx^.pp^.ChildCount = 0 ) then
  3041. Break;
  3042. end;
  3043. if b then
  3044. Inc(kk);
  3045. Inc(kk);
  3046. end;
  3047. end;
  3048. procedure AdvanceCharPos();inline;
  3049. begin
  3050. if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  3051. Inc(ctx^.i);
  3052. Inc(ctx^.ps);
  3053. end;
  3054. Inc_I();
  3055. end;
  3056. var
  3057. ok : Boolean;
  3058. pp1 : PUCA_PropItemRec;
  3059. cltemp : PUCA_DataBook;
  3060. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3061. begin
  3062. if AContext^.Finished then
  3063. exit(False);
  3064. ctx := AContext;
  3065. while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
  3066. ok := False;
  3067. if (ctx^.pp = nil) then begin // Start Matching
  3068. ok := StartMatch();
  3069. end else begin
  3070. pp1 := FindChild(ctx^.cp,ctx^.pp);
  3071. if (pp1 <> nil) then begin
  3072. Inc(ctx^.ppLevel);
  3073. ctx^.pp := pp1;
  3074. if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
  3075. ok := False;
  3076. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3077. if (ctx^.pp^.WeightLength > 0) then begin
  3078. AddWeightsAndClear();
  3079. ok := True;
  3080. end else
  3081. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3082. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3083. (ctxNode^.Data.WeightCount > 0)
  3084. then begin
  3085. AddContextWeights(@ctxNode^.Data);
  3086. ClearHistory();
  3087. ClearPP(ctx);
  3088. ok := True;
  3089. end
  3090. end;
  3091. if not ok then begin
  3092. RecordDeletion();
  3093. while HasHistory() do begin
  3094. GoBack();
  3095. if ctx^.pp^.IsValid() and
  3096. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3097. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3098. )
  3099. then begin
  3100. AddWeightsAndClear();
  3101. ok := True;
  3102. Break;
  3103. end;
  3104. end;
  3105. if not ok then begin
  3106. cltemp := ctx^.cl^.Base;
  3107. if (cltemp <> nil) then begin
  3108. ClearPP(ctx,False);
  3109. ctx^.cl := cltemp;
  3110. Continue;
  3111. end;
  3112. end;
  3113. if not ok then begin
  3114. AddComputedWeights(ctx^.cp);
  3115. ClearHistory();
  3116. ClearPP(ctx);
  3117. ok := True;
  3118. end;
  3119. end;
  3120. end else begin
  3121. RecordStep();
  3122. end;
  3123. end else begin
  3124. // permutations !
  3125. ok := False;
  3126. if TryPermutation() and ctx^.pp^.IsValid() then begin
  3127. if (ctx^.suppressState.CharCount = 0) then begin
  3128. AddWeightsAndClear();
  3129. ok := True;
  3130. exit(True);// Continue;
  3131. end;
  3132. while True do begin
  3133. if ctx^.pp^.IsValid() and
  3134. (ctx^.pp^.WeightLength > 0) and
  3135. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3136. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3137. )
  3138. then begin
  3139. AddWeightsAndClear();
  3140. ok := True;
  3141. break;
  3142. end;
  3143. if not HasHistory() then
  3144. break;
  3145. GoBack();
  3146. if (ctx^.pp = nil) then
  3147. break;
  3148. end;
  3149. end;
  3150. if not ok then begin
  3151. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3152. if (ctx^.pp^.WeightLength > 0) then begin
  3153. AddWeightsAndClear();
  3154. ok := True;
  3155. end else
  3156. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3157. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3158. (ctxNode^.Data.WeightCount > 0)
  3159. then begin
  3160. AddContextWeights(@ctxNode^.Data);
  3161. ClearHistory();
  3162. ClearPP(ctx);
  3163. ok := True;
  3164. end
  3165. end;
  3166. if ok then
  3167. exit(True);// Continue;
  3168. end;
  3169. if not ok then begin
  3170. if (ctx^.cl^.Base <> nil) then begin
  3171. cltemp := ctx^.cl^.Base;
  3172. while HasHistory() do
  3173. GoBack();
  3174. ctx^.pp := nil;
  3175. ctx^.ppLevel := 0;
  3176. ctx^.cl := cltemp;
  3177. Continue;
  3178. end;
  3179. //walk back
  3180. ok := False;
  3181. while HasHistory() do begin
  3182. GoBack();
  3183. if ctx^.pp^.IsValid() and
  3184. (ctx^.pp^.WeightLength > 0) and
  3185. ( (ctx^.suppressState.CharCount = 0) or
  3186. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3187. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3188. )
  3189. )
  3190. then begin
  3191. AddWeightsAndClear();
  3192. ok := True;
  3193. Break;
  3194. end;
  3195. end;
  3196. if ok then begin
  3197. AdvanceCharPos();
  3198. exit(True);// Continue;
  3199. end;
  3200. if (ctx^.pp <> nil) then begin
  3201. AddComputedWeights(ctx^.cp);
  3202. ClearHistory();
  3203. ClearPP(ctx);
  3204. ok := True;
  3205. end;
  3206. end;
  3207. end;
  3208. end;
  3209. if ctx^.surrogateState then begin
  3210. Inc(ctx^.ps);
  3211. Inc(ctx^.i);
  3212. end;
  3213. //
  3214. Inc_I();
  3215. if ok then
  3216. exit(True);
  3217. end;
  3218. SetLength(ctx^.r,ctx^.ral);
  3219. ctx^.Finished := True;
  3220. Result := True;
  3221. end;
  3222. function ComputeSortKey(
  3223. const AStr : PUnicodeChar;
  3224. const ALength : SizeInt;
  3225. const ACollation : PUCA_DataBook
  3226. ) : TUCASortKey;
  3227. var
  3228. r : TUCA_PropWeightsArray;
  3229. begin
  3230. r := ComputeRawSortKey(AStr,ALength,ACollation);
  3231. Result := FormKey(r,ACollation);
  3232. end;
  3233. end.