unicodedata.pas 98 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. -------------------------------------------------------------------------------
  16. Overview of the Unicode Collation Algorithm(UCA) data layout :
  17. ============================================================
  18. The UCA data(see “TUCA_DataBook”) are organized into index data
  19. (see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
  20. “OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
  21. the “Props” field of “TUCA_DataBook”). The index is a 3 level
  22. tables designed to minimize the overhaul data size. The
  23. properties’ data contain the actual (used) UCA’s properties
  24. for the customized code points(or sequence of code points)
  25. data (see TUCA_PropItemRec).
  26. To get the properties’ record of a code point, one goes
  27. through the index data to get its offset into the “Props”
  28. serialized data, see the “GetPropUCA” procedure.
  29. The “TUCA_PropItemRec” record, that represents the actual
  30. properties, contains a fixed part and a variable part. The
  31. fixed part is directly expressed as fields of the record :
  32. “WeightLength”, “ChildCount”, “Size”, “Flags”. The
  33. variable part depends on some values of the fixed part; For
  34. example “WeightLength” specify the number of weight[1] item,
  35. it can be zero or not null; The “Flags” fields does contains
  36. some bit states to indicate for example if the record’s owner,
  37. that is the target code point, is present(it is not always
  38. necessary to store the code point as you are required to have
  39. this information in the first place in order to get the
  40. “TUCA_PropItemRec” record).
  41. The data, as it is organized now, is as follow for each code point :
  42. * the fixed part is serialized,
  43. * if there are weight item array, they are serialized
  44. (see the "WeigthLength")
  45. * the code point is serialized (if needed)
  46. * the context[2] array is serialized
  47. * The children[3] record are serialized.
  48. The “Size” represent the size of the whole record, including its
  49. children records(see [3]). The “GetSelfOnlySize” returns the size
  50. of the queried record, excluding the size of its children.
  51. Notes :
  52. [1] : A weight item is an array of 3 words. A code point/sequence of code
  53. point may have zero or multiple items.
  54. [2] : There are characters(mostly japanese ones) that do not have their
  55. own weighs; There inherit the weights of the preceding character
  56. in the string that you will be evaluating.
  57. [3] : Some unicode characters are expressed using more than one code point.
  58. In that case the properties records are serialized as a trie. The
  59. trie data structure is useful when many characters’ expression have
  60. the same starting code point(s).
  61. [4] TUCA_PropItemRec serialization :
  62. TUCA_PropItemRec :
  63. WeightLength, ChildCount, Size, Flags [weight item array]
  64. [Code Point] [Context data]
  65. [Child 0] [Child 1] .. [Child n]
  66. each [Child k] is a TUCA_PropItemRec.
  67. }
  68. unit unicodedata;
  69. {$mode delphi}
  70. {$H+}
  71. {$PACKENUM 1}
  72. {$SCOPEDENUMS ON}
  73. {$pointermath on}
  74. {$define USE_INLINE}
  75. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  76. { $define uni_debug}
  77. interface
  78. const
  79. MAX_WORD = High(Word);
  80. LOW_SURROGATE_BEGIN = Word($DC00);
  81. LOW_SURROGATE_END = Word($DFFF);
  82. HIGH_SURROGATE_BEGIN = Word($D800);
  83. HIGH_SURROGATE_END = Word($DBFF);
  84. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  85. UCS4_HALF_BASE = LongWord($10000);
  86. UCS4_HALF_MASK = Word($3FF);
  87. MAX_LEGAL_UTF32 = $10FFFF;
  88. const
  89. // Unicode General Category
  90. UGC_UppercaseLetter = 0;
  91. UGC_LowercaseLetter = 1;
  92. UGC_TitlecaseLetter = 2;
  93. UGC_ModifierLetter = 3;
  94. UGC_OtherLetter = 4;
  95. UGC_NonSpacingMark = 5;
  96. UGC_CombiningMark = 6;
  97. UGC_EnclosingMark = 7;
  98. UGC_DecimalNumber = 8;
  99. UGC_LetterNumber = 9;
  100. UGC_OtherNumber = 10;
  101. UGC_ConnectPunctuation = 11;
  102. UGC_DashPunctuation = 12;
  103. UGC_OpenPunctuation = 13;
  104. UGC_ClosePunctuation = 14;
  105. UGC_InitialPunctuation = 15;
  106. UGC_FinalPunctuation = 16;
  107. UGC_OtherPunctuation = 17;
  108. UGC_MathSymbol = 18;
  109. UGC_CurrencySymbol = 19;
  110. UGC_ModifierSymbol = 20;
  111. UGC_OtherSymbol = 21;
  112. UGC_SpaceSeparator = 22;
  113. UGC_LineSeparator = 23;
  114. UGC_ParagraphSeparator = 24;
  115. UGC_Control = 25;
  116. UGC_Format = 26;
  117. UGC_Surrogate = 27;
  118. UGC_PrivateUse = 28;
  119. UGC_Unassigned = 29;
  120. type
  121. TUInt24Rec = packed record
  122. public
  123. {$ifdef FPC_LITTLE_ENDIAN}
  124. byte0, byte1, byte2 : Byte;
  125. {$else FPC_LITTLE_ENDIAN}
  126. byte2, byte1, byte0 : Byte;
  127. {$endif FPC_LITTLE_ENDIAN}
  128. public
  129. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  130. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  131. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  132. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  133. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  134. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  135. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  136. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  137. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  138. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  139. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  140. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  141. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  142. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  143. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  144. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  145. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  146. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  147. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  148. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  149. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  150. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  151. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  152. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  153. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  154. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  155. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  156. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  157. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  158. end;
  159. UInt24 = TUInt24Rec;
  160. PUInt24 = ^UInt24;
  161. const
  162. ZERO_UINT24 : UInt24 =
  163. {$ifdef FPC_LITTLE_ENDIAN}
  164. (byte0 : 0; byte1 : 0; byte2 : 0;);
  165. {$else FPC_LITTLE_ENDIAN}
  166. (byte2 : 0; byte1 : 0; byte0 : 0;);
  167. {$endif FPC_LITTLE_ENDIAN}
  168. type
  169. PUC_Prop = ^TUC_Prop;
  170. { TUC_Prop }
  171. { On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
  172. This breaks intended layout of initialized constants/variables.
  173. A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
  174. but for now just declare this record as "unpacked". This causes bloat, but it's better than having
  175. entire unit not working at all. }
  176. TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
  177. private
  178. function GetCategory : Byte;inline;
  179. procedure SetCategory(AValue : Byte);
  180. function GetWhiteSpace : Boolean;inline;
  181. procedure SetWhiteSpace(AValue : Boolean);
  182. function GetHangulSyllable : Boolean;inline;
  183. procedure SetHangulSyllable(AValue : Boolean);
  184. function GetNumericValue: Double;inline;
  185. public
  186. CategoryData : Byte;
  187. public
  188. CCC : Byte;
  189. NumericIndex : Byte;
  190. SimpleUpperCase : UInt24;
  191. SimpleLowerCase : UInt24;
  192. DecompositionID : SmallInt;
  193. public
  194. property Category : Byte read GetCategory write SetCategory;
  195. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  196. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  197. property NumericValue : Double read GetNumericValue;
  198. end;
  199. type
  200. TUCA_PropWeights = packed record
  201. Weights : array[0..2] of Word;
  202. end;
  203. PUCA_PropWeights = ^TUCA_PropWeights;
  204. TUCA_PropItemContextRec = packed record
  205. public
  206. CodePointCount : Byte;
  207. WeightCount : Byte;
  208. public
  209. function GetCodePoints() : PUInt24;inline;
  210. function GetWeights() : PUCA_PropWeights;inline;
  211. end;
  212. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  213. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  214. TUCA_PropItemContextTreeNodeRec = packed record
  215. public
  216. Left : Word;
  217. Right : Word;
  218. Data : TUCA_PropItemContextRec;
  219. public
  220. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  221. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  222. end;
  223. { TUCA_PropItemContextTreeRec }
  224. TUCA_PropItemContextTreeRec = packed record
  225. public
  226. Size : UInt24;
  227. public
  228. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  229. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  230. function Find(
  231. const AChars : PUInt24;
  232. const ACharCount : Integer;
  233. out ANode : PUCA_PropItemContextTreeNodeRec
  234. ) : Boolean;
  235. end;
  236. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  237. { TUCA_PropItemRec }
  238. TUCA_PropItemRec = packed record
  239. private
  240. const FLAG_VALID = 0;
  241. const FLAG_CODEPOINT = 1;
  242. const FLAG_CONTEXTUAL = 2;
  243. const FLAG_DELETION = 3;
  244. const FLAG_COMPRESS_WEIGHT_1 = 6;
  245. const FLAG_COMPRESS_WEIGHT_2 = 7;
  246. private
  247. function GetCodePoint() : UInt24;inline;
  248. public
  249. WeightLength : Byte;
  250. ChildCount : Byte;
  251. Size : Word;
  252. Flags : Byte;
  253. public
  254. function HasCodePoint() : Boolean;inline;
  255. property CodePoint : UInt24 read GetCodePoint;
  256. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  257. function IsValid() : Boolean;inline;
  258. //function GetWeightArray() : PUCA_PropWeights;inline;
  259. procedure GetWeightArray(ADest : PUCA_PropWeights);
  260. function GetSelfOnlySize() : Cardinal;inline;
  261. function GetContextual() : Boolean;inline;
  262. property Contextual : Boolean read GetContextual;
  263. function GetContext() : PUCA_PropItemContextTreeRec;
  264. function IsDeleted() : Boolean;inline;
  265. function IsWeightCompress_1() : Boolean;inline;
  266. function IsWeightCompress_2() : Boolean;inline;
  267. end;
  268. PUCA_PropItemRec = ^TUCA_PropItemRec;
  269. TUCA_VariableKind = (
  270. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  271. ucaIgnoreSP // This one is not implemented !
  272. );
  273. TCollationName = string[128];
  274. PUCA_DataBook = ^TUCA_DataBook;
  275. TUCA_DataBook = record
  276. public
  277. Base : PUCA_DataBook;
  278. Version : TCollationName;
  279. CollationName : TCollationName;
  280. VariableWeight : TUCA_VariableKind;
  281. Backwards : array[0..3] of Boolean;
  282. BMP_Table1 : PByte;
  283. BMP_Table2 : PUInt24;
  284. OBMP_Table1 : PWord;
  285. OBMP_Table2 : PUInt24;
  286. PropCount : Integer;
  287. Props : PUCA_PropItemRec;
  288. VariableLowLimit : Word;
  289. VariableHighLimit : Word;
  290. Dynamic : Boolean;
  291. public
  292. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  293. end;
  294. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  295. TCollationFields = set of TCollationField;
  296. const
  297. ROOT_COLLATION_NAME = 'DUCET';
  298. ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
  299. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  300. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  301. function UnicodeIsSurrogatePair(
  302. const AHighSurrogate,
  303. ALowSurrogate : UnicodeChar
  304. ) : Boolean;inline;
  305. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  306. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  307. function UnicodeToUpper(
  308. const AString : UnicodeString;
  309. const AIgnoreInvalidSequence : Boolean;
  310. out AResultString : UnicodeString
  311. ) : Integer;
  312. function UnicodeToLower(
  313. const AString : UnicodeString;
  314. const AIgnoreInvalidSequence : Boolean;
  315. out AResultString : UnicodeString
  316. ) : Integer;
  317. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  318. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  319. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  320. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  321. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  322. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  323. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  324. function DecomposeCanonical(const AString : UnicodeString) : UnicodeString;inline;overload;
  325. function DecomposeCanonical(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  326. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  327. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  328. type
  329. TUCASortKeyItem = Word;
  330. TUCASortKey = array of TUCASortKeyItem;
  331. function ComputeSortKey(
  332. const AString : UnicodeString;
  333. const ACollation : PUCA_DataBook
  334. ) : TUCASortKey;inline;overload;
  335. function ComputeSortKey(
  336. const AStr : PUnicodeChar;
  337. const ALength : SizeInt;
  338. const ACollation : PUCA_DataBook
  339. ) : TUCASortKey;overload;
  340. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  341. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  342. function IncrementalCompareString(
  343. const AStrA : PUnicodeChar;
  344. const ALengthA : SizeInt;
  345. const AStrB : PUnicodeChar;
  346. const ALengthB : SizeInt;
  347. const ACollation : PUCA_DataBook
  348. ) : Integer;overload;
  349. function IncrementalCompareString(
  350. const AStrA,
  351. AStrB : UnicodeString;
  352. const ACollation : PUCA_DataBook
  353. ) : Integer;inline;overload;
  354. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
  355. function RegisterCollation(
  356. const ADirectory,
  357. ALanguage : string
  358. ) : Boolean;overload;
  359. function UnregisterCollation(const AName : ansistring): Boolean;
  360. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  361. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  362. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  363. function GetCollationCount() : Integer;
  364. procedure PrepareCollation(
  365. ACollation : PUCA_DataBook;
  366. const ABaseName : ansistring;
  367. const AChangedFields : TCollationFields
  368. );
  369. function LoadCollation(
  370. const AData : Pointer;
  371. const ADataLength : Integer
  372. ) : PUCA_DataBook;overload;
  373. function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
  374. function LoadCollation(
  375. const ADirectory,
  376. ALanguage : string
  377. ) : PUCA_DataBook;overload;
  378. procedure FreeCollation(AItem : PUCA_DataBook);
  379. type
  380. TEndianKind = (Little, Big);
  381. const
  382. ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
  383. {$IFDEF ENDIAN_LITTLE}
  384. ENDIAN_NATIVE = TEndianKind.Little;
  385. ENDIAN_NON_NATIVE = TEndianKind.Big;
  386. {$ENDIF ENDIAN_LITTLE}
  387. {$IFDEF ENDIAN_BIG}
  388. ENDIAN_NATIVE = TEndianKind.Big;
  389. ENDIAN_NON_NATIVE = TEndianKind.Little;
  390. {$ENDIF ENDIAN_BIG}
  391. resourcestring
  392. SCollationNotFound = 'Collation not found : "%s".';
  393. implementation
  394. uses
  395. unicodenumtable;
  396. type
  397. TCardinalRec = packed record
  398. {$ifdef FPC_LITTLE_ENDIAN}
  399. byte0, byte1, byte2, byte3 : Byte;
  400. {$else FPC_LITTLE_ENDIAN}
  401. byte3, byte2, byte1, byte0 : Byte;
  402. {$endif FPC_LITTLE_ENDIAN}
  403. end;
  404. TWordRec = packed record
  405. {$ifdef FPC_LITTLE_ENDIAN}
  406. byte0, byte1 : Byte;
  407. {$else FPC_LITTLE_ENDIAN}
  408. byte1, byte0 : Byte;
  409. {$endif FPC_LITTLE_ENDIAN}
  410. end;
  411. { TUInt24Rec }
  412. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  413. begin
  414. TCardinalRec(Result).byte0 := a.byte0;
  415. TCardinalRec(Result).byte1 := a.byte1;
  416. TCardinalRec(Result).byte2 := a.byte2;
  417. TCardinalRec(Result).byte3 := 0;
  418. end;
  419. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  420. begin
  421. Result := Cardinal(a);
  422. end;
  423. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  424. begin
  425. {$IFOPT R+}
  426. if (a > $FFFF) then
  427. Error(reIntOverflow);
  428. {$ENDIF R+}
  429. TWordRec(Result).byte0 := a.byte0;
  430. TWordRec(Result).byte1 := a.byte1;
  431. end;
  432. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  433. begin
  434. {$IFOPT R+}
  435. if (a > $FF) then
  436. Error(reIntOverflow);
  437. {$ENDIF R+}
  438. Result := a.byte0;
  439. end;
  440. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  441. begin
  442. {$IFOPT R+}
  443. if (a > $FFFFFF) then
  444. Error(reIntOverflow);
  445. {$ENDIF R+}
  446. Result.byte0 := TCardinalRec(a).byte0;
  447. Result.byte1 := TCardinalRec(a).byte1;
  448. Result.byte2 := TCardinalRec(a).byte2;
  449. end;
  450. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  451. begin
  452. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  453. end;
  454. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  455. begin
  456. Result := (TCardinalRec(b).byte3 = 0) and
  457. (a.byte0 = TCardinalRec(b).byte0) and
  458. (a.byte1 = TCardinalRec(b).byte1) and
  459. (a.byte2 = TCardinalRec(b).byte2);
  460. end;
  461. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  462. begin
  463. Result := (b = a);
  464. end;
  465. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  466. begin
  467. Result := (LongInt(a) = b);
  468. end;
  469. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  470. begin
  471. Result := (b = a);
  472. end;
  473. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  474. begin
  475. Result := (a.byte2 = 0) and
  476. (a.byte0 = TWordRec(b).byte0) and
  477. (a.byte1 = TWordRec(b).byte1);
  478. end;
  479. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  480. begin
  481. Result := (b = a);
  482. end;
  483. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  484. begin
  485. Result := (a.byte2 = 0) and
  486. (a.byte1 = 0) and
  487. (a.byte0 = b);
  488. end;
  489. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  490. begin
  491. Result := (b = a);
  492. end;
  493. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  494. begin
  495. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  496. end;
  497. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  498. begin
  499. Result := (TCardinalRec(b).byte3 <> 0) or
  500. (a.byte0 <> TCardinalRec(b).byte0) or
  501. (a.byte1 <> TCardinalRec(b).byte1) or
  502. (a.byte2 <> TCardinalRec(b).byte2);
  503. end;
  504. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  505. begin
  506. Result := (b <> a);
  507. end;
  508. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  509. begin
  510. Result := (a.byte2 > b.byte2) or
  511. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  512. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  513. end;
  514. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  515. begin
  516. Result := Cardinal(a) > b;
  517. end;
  518. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  519. begin
  520. Result := a > Cardinal(b);
  521. end;
  522. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  523. begin
  524. Result := (a.byte2 > b.byte2) or
  525. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  526. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  527. end;
  528. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  529. begin
  530. Result := Cardinal(a) >= b;
  531. end;
  532. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  533. begin
  534. Result := a >= Cardinal(b);
  535. end;
  536. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  537. begin
  538. Result := (b > a);
  539. end;
  540. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  541. begin
  542. Result := Cardinal(a) < b;
  543. end;
  544. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  545. begin
  546. Result := a < Cardinal(b);
  547. end;
  548. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  549. begin
  550. Result := (b >= a);
  551. end;
  552. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  553. begin
  554. Result := Cardinal(a) <= b;
  555. end;
  556. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  557. begin
  558. Result := a <= Cardinal(b);
  559. end;
  560. type
  561. TBitOrder = 0..7;
  562. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  563. begin
  564. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  565. end;
  566. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  567. begin
  568. if AValue then
  569. AData := AData or (1 shl (ABit mod 8))
  570. else
  571. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  572. end;
  573. var
  574. CollationTable : array of PUCA_DataBook;
  575. function IndexOfCollation(const AName : string) : Integer;
  576. var
  577. i, c : Integer;
  578. p : Pointer;
  579. begin
  580. c := Length(AName);
  581. p := @AName[1];
  582. for i := 0 to Length(CollationTable) - 1 do begin
  583. if (Length(CollationTable[i]^.CollationName) = c) and
  584. (CompareByte((CollationTable[i]^.CollationName[1]),p^,c)=0)
  585. then
  586. exit(i);
  587. end;
  588. Result := -1;
  589. end;
  590. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  591. var
  592. i : Integer;
  593. begin
  594. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  595. if Result then begin
  596. i := Length(CollationTable);
  597. SetLength(CollationTable,(i+1));
  598. CollationTable[i] := ACollation;
  599. end;
  600. end;
  601. function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
  602. var
  603. cl : PUCA_DataBook;
  604. begin
  605. cl := LoadCollation(ADirectory,ALanguage);
  606. if (cl = nil) then
  607. exit(False);
  608. try
  609. Result := RegisterCollation(cl);
  610. except
  611. FreeCollation(cl);
  612. raise;
  613. end;
  614. if not Result then
  615. FreeCollation(cl);
  616. end;
  617. function UnregisterCollation(const AName : ansistring): Boolean;
  618. var
  619. i, c : Integer;
  620. begin
  621. i := IndexOfCollation(AName);
  622. Result := (i >= 0);
  623. if Result then begin
  624. c := Length(CollationTable);
  625. if (c = 1) then begin
  626. SetLength(CollationTable,0);
  627. end else begin
  628. CollationTable[i] := CollationTable[c-1];
  629. SetLength(CollationTable,(c-1));
  630. end;
  631. end;
  632. end;
  633. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  634. var
  635. i : Integer;
  636. cl : PUCA_DataBook;
  637. begin
  638. if AFreeDynamicCollations then begin
  639. for i := Low(CollationTable) to High(CollationTable) do begin
  640. if CollationTable[i].Dynamic then begin
  641. cl := CollationTable[i];
  642. CollationTable[i] := nil;
  643. FreeCollation(cl);
  644. end;
  645. end;
  646. end;
  647. SetLength(CollationTable,0);
  648. end;
  649. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  650. var
  651. i : Integer;
  652. begin
  653. i := IndexOfCollation(AName);
  654. if (i = -1) then
  655. Result := nil
  656. else
  657. Result := CollationTable[i];
  658. end;
  659. function GetCollationCount() : Integer;
  660. begin
  661. Result := Length(CollationTable);
  662. end;
  663. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  664. begin
  665. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  666. Result := nil
  667. else
  668. Result := CollationTable[AIndex];
  669. end;
  670. procedure PrepareCollation(
  671. ACollation : PUCA_DataBook;
  672. const ABaseName : ansistring;
  673. const AChangedFields : TCollationFields
  674. );
  675. var
  676. s : ansistring;
  677. p, base : PUCA_DataBook;
  678. begin
  679. if (ABaseName <> '') then
  680. s := ABaseName
  681. else
  682. s := ROOT_COLLATION_NAME;
  683. p := ACollation;
  684. base := FindCollation(s);
  685. if (base = nil) then
  686. Error(reCodesetConversion);
  687. p^.Base := base;
  688. if not(TCollationField.BackWard in AChangedFields) then
  689. p^.Backwards := base^.Backwards;
  690. if not(TCollationField.VariableLowLimit in AChangedFields) then
  691. p^.VariableLowLimit := base^.VariableLowLimit;
  692. if not(TCollationField.VariableHighLimit in AChangedFields) then
  693. p^.VariableLowLimit := base^.VariableHighLimit;
  694. end;
  695. type
  696. TSerializedCollationHeader = packed record
  697. Base : TCollationName;
  698. Version : TCollationName;
  699. CollationName : TCollationName;
  700. VariableWeight : Byte;
  701. Backwards : Byte;
  702. BMP_Table1Length : DWord;
  703. BMP_Table2Length : DWord;
  704. OBMP_Table1Length : DWord;
  705. OBMP_Table2Length : DWord;
  706. PropCount : DWord;
  707. VariableLowLimit : Word;
  708. VariableHighLimit : Word;
  709. ChangedFields : Byte;
  710. end;
  711. PSerializedCollationHeader = ^TSerializedCollationHeader;
  712. procedure FreeCollation(AItem : PUCA_DataBook);
  713. var
  714. h : PSerializedCollationHeader;
  715. begin
  716. if (AItem = nil) or not(AItem^.Dynamic) then
  717. exit;
  718. h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
  719. if (AItem^.BMP_Table1 <> nil) then
  720. FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
  721. if (AItem^.BMP_Table2 <> nil) then
  722. FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
  723. if (AItem^.OBMP_Table1 <> nil) then
  724. FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
  725. if (AItem^.OBMP_Table2 <> nil) then
  726. FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
  727. if (AItem^.Props <> nil) then
  728. FreeMem(AItem^.Props,h^.PropCount);
  729. FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  730. end;
  731. function LoadCollation(
  732. const AData : Pointer;
  733. const ADataLength : Integer
  734. ) : PUCA_DataBook;
  735. var
  736. dataPointer : PByte;
  737. readedLength : LongInt;
  738. function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
  739. begin
  740. Result := (readedLength + ALength) <= ADataLength;
  741. if not result then
  742. exit;
  743. Move(dataPointer^,ADest^,ALength);
  744. Inc(dataPointer,ALength);
  745. readedLength := readedLength + ALength;
  746. end;
  747. var
  748. r : PUCA_DataBook;
  749. h : PSerializedCollationHeader;
  750. cfs : TCollationFields;
  751. i : Integer;
  752. baseName : TCollationName;
  753. begin
  754. readedLength := 0;
  755. dataPointer := AData;
  756. r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  757. try
  758. h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
  759. if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
  760. exit;
  761. r^.Version := h^.Version;
  762. r^.CollationName := h^.CollationName;
  763. r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
  764. r^.Backwards[0] := IsBitON(h^.Backwards,0);
  765. r^.Backwards[1] := IsBitON(h^.Backwards,1);
  766. r^.Backwards[2] := IsBitON(h^.Backwards,2);
  767. r^.Backwards[3] := IsBitON(h^.Backwards,3);
  768. if (h^.BMP_Table1Length > 0) then begin
  769. r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
  770. if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
  771. exit;
  772. end;
  773. if (h^.BMP_Table2Length > 0) then begin
  774. r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
  775. if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
  776. exit;
  777. end;
  778. if (h^.OBMP_Table1Length > 0) then begin
  779. r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
  780. if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
  781. exit;
  782. end;
  783. if (h^.OBMP_Table2Length > 0) then begin
  784. r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
  785. if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
  786. exit;
  787. end;
  788. r^.PropCount := h^.PropCount;
  789. if (h^.PropCount > 0) then begin
  790. r^.Props := GetMem(h^.PropCount);
  791. if not ReadBuffer(r^.Props,h^.PropCount) then
  792. exit;
  793. end;
  794. r^.VariableLowLimit := h^.VariableLowLimit;
  795. r^.VariableHighLimit := h^.VariableHighLimit;
  796. cfs := [];
  797. for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
  798. if IsBitON(h^.ChangedFields,i) then
  799. cfs := cfs + [TCollationField(i)];
  800. end;
  801. if (h^.Base <> '') then
  802. baseName := h^.Base
  803. else if (h^.CollationName <> ROOT_COLLATION_NAME) then
  804. baseName := ROOT_COLLATION_NAME
  805. else
  806. baseName := '';
  807. if (baseName <> '') then
  808. PrepareCollation(r,baseName,cfs);
  809. r^.Dynamic := True;
  810. Result := r;
  811. except
  812. FreeCollation(r);
  813. raise;
  814. end;
  815. end;
  816. {$PUSH}
  817. function LoadCollation(const AFileName : string) : PUCA_DataBook;
  818. const
  819. BLOCK_SIZE = 16*1024;
  820. var
  821. f : File of Byte;
  822. locSize, locReaded, c : LongInt;
  823. locBuffer : PByte;
  824. locBlockSize : LongInt;
  825. begin
  826. Result := nil;
  827. {$I-}
  828. if (AFileName = '') then
  829. exit;
  830. Assign(f,AFileName);
  831. Reset(f);
  832. try
  833. if (IOResult <> 0) then
  834. exit;
  835. locSize := FileSize(f);
  836. if (locSize < SizeOf(TSerializedCollationHeader)) then
  837. exit;
  838. locBuffer := GetMem(locSize);
  839. try
  840. locBlockSize := BLOCK_SIZE;
  841. locReaded := 0;
  842. while (locReaded < locSize) do begin
  843. if (locBlockSize > (locSize-locReaded)) then
  844. locBlockSize := locSize-locReaded;
  845. BlockRead(f,locBuffer[locReaded],locBlockSize,c);
  846. if (IOResult <> 0) or (c <= 0) then
  847. exit;
  848. locReaded := locReaded + c;
  849. end;
  850. Result := LoadCollation(locBuffer,locSize);
  851. finally
  852. FreeMem(locBuffer,locSize);
  853. end;
  854. finally
  855. Close(f);
  856. end;
  857. end;
  858. {$POP}
  859. function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
  860. var
  861. fileName : string;
  862. begin
  863. fileName := ADirectory;
  864. if (fileName <> '') then begin
  865. if (fileName[Length(fileName)] <> DirectorySeparator) then
  866. fileName := fileName + DirectorySeparator;
  867. end;
  868. fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
  869. Result := LoadCollation(fileName);
  870. end;
  871. {$INCLUDE unicodedata.inc}
  872. {$IFDEF ENDIAN_LITTLE}
  873. {$INCLUDE unicodedata_le.inc}
  874. {$ENDIF ENDIAN_LITTLE}
  875. {$IFDEF ENDIAN_BIG}
  876. {$INCLUDE unicodedata_be.inc}
  877. {$ENDIF ENDIAN_BIG}
  878. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  879. begin
  880. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  881. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  882. end;
  883. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  884. begin
  885. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  886. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  887. end;
  888. function UnicodeIsSurrogatePair(
  889. const AHighSurrogate,
  890. ALowSurrogate : UnicodeChar
  891. ) : Boolean;
  892. begin
  893. Result :=
  894. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  895. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  896. ) and
  897. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  898. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  899. )
  900. end;
  901. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  902. begin
  903. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  904. (Word(AValue) <= HIGH_SURROGATE_END);
  905. end;
  906. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  907. begin
  908. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  909. (Word(AValue) <= LOW_SURROGATE_END);
  910. end;
  911. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  912. begin
  913. Result:=
  914. @UC_PROP_ARRAY[
  915. UC_TABLE_3[
  916. UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
  917. [lo(ACodePoint) shr 4]
  918. ][lo(ACodePoint) and $F]
  919. ]; {
  920. @UC_PROP_ARRAY[
  921. UC_TABLE_2[
  922. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  923. WordRec(ACodePoint).Lo
  924. ]
  925. ];}
  926. end;
  927. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  928. begin
  929. Result:=
  930. @UC_PROP_ARRAY[
  931. UCO_TABLE_3[
  932. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  933. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  934. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  935. ]; {
  936. Result:=
  937. @UC_PROP_ARRAY[
  938. UCO_TABLE_2[
  939. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  940. Word(ALowS) - LOW_SURROGATE_BEGIN
  941. ]
  942. ]; }
  943. end;
  944. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  945. var
  946. l, h : UnicodeChar;
  947. begin
  948. if (ACodePoint <= High(Word)) then
  949. exit(GetProps(Word(ACodePoint)));
  950. FromUCS4(ACodePoint,h,l);
  951. Result := GetProps(h,l);
  952. end;
  953. function UnicodeToUpper(
  954. const AString : UnicodeString;
  955. const AIgnoreInvalidSequence : Boolean;
  956. out AResultString : UnicodeString
  957. ) : Integer;
  958. var
  959. i, c : SizeInt;
  960. pp, pr : PUnicodeChar;
  961. pu : PUC_Prop;
  962. locIsSurrogate : Boolean;
  963. r : UnicodeString;
  964. begin
  965. c := Length(AString);
  966. SetLength(r,2*c);
  967. if (c > 0) then begin
  968. pp := @AString[1];
  969. pr := @r[1];
  970. i := 1;
  971. while (i <= c) do begin
  972. pu := GetProps(Word(pp^));
  973. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  974. if locIsSurrogate then begin
  975. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  976. if AIgnoreInvalidSequence then begin
  977. pr^ := pp^;
  978. Inc(pp);
  979. Inc(pr);
  980. Inc(i);
  981. Continue;
  982. end;
  983. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  984. end;
  985. pu := GetProps(pp^,AString[i+1]);
  986. end;
  987. if (pu^.SimpleUpperCase = 0) then begin
  988. pr^ := pp^;
  989. if locIsSurrogate then begin
  990. Inc(pp);
  991. Inc(pr);
  992. Inc(i);
  993. pr^ := pp^;
  994. end;
  995. end else begin
  996. if (pu^.SimpleUpperCase <= $FFFF) then begin
  997. pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
  998. end else begin
  999. FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1000. Inc(pr);
  1001. end;
  1002. if locIsSurrogate then begin
  1003. Inc(pp);
  1004. Inc(i);
  1005. end;
  1006. end;
  1007. Inc(pp);
  1008. Inc(pr);
  1009. Inc(i);
  1010. end;
  1011. Dec(pp);
  1012. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1013. SetLength(r,i);
  1014. AResultString := r;
  1015. end;
  1016. Result := 0;
  1017. end;
  1018. function UnicodeToLower(
  1019. const AString : UnicodeString;
  1020. const AIgnoreInvalidSequence : Boolean;
  1021. out AResultString : UnicodeString
  1022. ) : Integer;
  1023. var
  1024. i, c : SizeInt;
  1025. pp, pr : PUnicodeChar;
  1026. pu : PUC_Prop;
  1027. locIsSurrogate : Boolean;
  1028. r : UnicodeString;
  1029. begin
  1030. c := Length(AString);
  1031. SetLength(r,2*c);
  1032. if (c > 0) then begin
  1033. pp := @AString[1];
  1034. pr := @r[1];
  1035. i := 1;
  1036. while (i <= c) do begin
  1037. pu := GetProps(Word(pp^));
  1038. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1039. if locIsSurrogate then begin
  1040. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  1041. if AIgnoreInvalidSequence then begin
  1042. pr^ := pp^;
  1043. Inc(pp);
  1044. Inc(pr);
  1045. Inc(i);
  1046. Continue;
  1047. end;
  1048. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  1049. end;
  1050. pu := GetProps(pp^,AString[i+1]);
  1051. end;
  1052. if (pu^.SimpleLowerCase = 0) then begin
  1053. pr^ := pp^;
  1054. if locIsSurrogate then begin
  1055. Inc(pp);
  1056. Inc(pr);
  1057. Inc(i);
  1058. pr^ := pp^;
  1059. end;
  1060. end else begin
  1061. if (pu^.SimpleLowerCase <= $FFFF) then begin
  1062. pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
  1063. end else begin
  1064. FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1065. Inc(pr);
  1066. end;
  1067. if locIsSurrogate then begin
  1068. Inc(pp);
  1069. Inc(i);
  1070. end;
  1071. end;
  1072. Inc(pp);
  1073. Inc(pr);
  1074. Inc(i);
  1075. end;
  1076. Dec(pp);
  1077. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1078. SetLength(r,i);
  1079. AResultString := r;
  1080. end;
  1081. Result := 0;
  1082. end;
  1083. //----------------------------------------------------------------------
  1084. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  1085. const
  1086. SBase = $AC00;
  1087. LBase = $1100;
  1088. VBase = $1161;
  1089. TBase = $11A7;
  1090. LCount = 19;
  1091. VCount = 21;
  1092. TCount = 28;
  1093. NCount = VCount * TCount; // 588
  1094. SCount = LCount * NCount; // 11172
  1095. var
  1096. SIndex, L, V, T : Integer;
  1097. begin
  1098. SIndex := AChar - SBase;
  1099. if (SIndex < 0) or (SIndex >= SCount) then begin
  1100. ABuffer^ := AChar;
  1101. exit(1);
  1102. end;
  1103. L := LBase + SIndex div NCount;
  1104. V := VBase + (SIndex mod NCount) div TCount;
  1105. T := TBase + SIndex mod TCount;
  1106. ABuffer[0] := L;
  1107. ABuffer[1] := V;
  1108. Result := 2;
  1109. if (T <> TBase) then begin
  1110. ABuffer[2] := T;
  1111. Inc(Result);
  1112. end;
  1113. end;
  1114. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  1115. var
  1116. locStack : array[0..23] of Cardinal;
  1117. locStackIdx : Integer;
  1118. ResultBuffer : array[0..23] of Cardinal;
  1119. ResultIdx : Integer;
  1120. procedure AddCompositionToStack(const AIndex : Integer);
  1121. var
  1122. pdecIdx : ^TDecompositionIndexRec;
  1123. k, kc : Integer;
  1124. pu : ^UInt24;
  1125. begin
  1126. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  1127. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  1128. kc := pdecIdx^.Length;
  1129. Inc(pu,kc);
  1130. for k := 1 to kc do begin
  1131. Dec(pu);
  1132. locStack[locStackIdx + k] := pu^;
  1133. end;
  1134. locStackIdx := locStackIdx + kc;
  1135. end;
  1136. procedure AddResult(const AChar : Cardinal);inline;
  1137. begin
  1138. Inc(ResultIdx);
  1139. ResultBuffer[ResultIdx] := AChar;
  1140. end;
  1141. function PopStack() : Cardinal;inline;
  1142. begin
  1143. Result := locStack[locStackIdx];
  1144. Dec(locStackIdx);
  1145. end;
  1146. var
  1147. cu : Cardinal;
  1148. decIdx : SmallInt;
  1149. locIsWord : Boolean;
  1150. i : Integer;
  1151. p : PUnicodeChar;
  1152. begin
  1153. ResultIdx := -1;
  1154. locStackIdx := -1;
  1155. AddCompositionToStack(ADecomposeIndex);
  1156. while (locStackIdx >= 0) do begin
  1157. cu := PopStack();
  1158. locIsWord := (cu <= MAX_WORD);
  1159. if locIsWord then
  1160. decIdx := GetProps(Word(cu))^.DecompositionID
  1161. else
  1162. decIdx := GetProps(cu)^.DecompositionID;
  1163. if (decIdx = -1) then
  1164. AddResult(cu)
  1165. else
  1166. AddCompositionToStack(decIdx);
  1167. end;
  1168. p := ABuffer;
  1169. Result := 0;
  1170. for i := 0 to ResultIdx do begin
  1171. cu := ResultBuffer[i];
  1172. if (cu <= MAX_WORD) then begin
  1173. p[0] := UnicodeChar(Word(cu));
  1174. Inc(p);
  1175. end else begin
  1176. FromUCS4(cu,p[0],p[1]);
  1177. Inc(p,2);
  1178. Inc(Result);
  1179. end;
  1180. end;
  1181. Result := Result + ResultIdx + 1;
  1182. end;
  1183. procedure CanonicalOrder(var AString : UnicodeString);
  1184. begin
  1185. CanonicalOrder(@AString[1],Length(AString));
  1186. end;
  1187. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  1188. var
  1189. i, c : SizeInt;
  1190. p, q : PUnicodeChar;
  1191. locIsSurrogateP, locIsSurrogateQ : Boolean;
  1192. procedure Swap();
  1193. var
  1194. t, t1 : UnicodeChar;
  1195. begin
  1196. if not locIsSurrogateP then begin
  1197. if not locIsSurrogateQ then begin
  1198. t := p^;
  1199. p^ := q^;
  1200. q^ := t;
  1201. exit;
  1202. end;
  1203. t := p^;
  1204. p[0] := q[0];
  1205. p[1] := q[1];
  1206. q[1] := t;
  1207. exit;
  1208. end;
  1209. if not locIsSurrogateQ then begin
  1210. t := q[0];
  1211. p[2] := p[1];
  1212. p[1] := p[0];
  1213. p[0] := t;
  1214. exit;
  1215. end;
  1216. t := p[0];
  1217. t1 := p[1];
  1218. p[0] := q[0];
  1219. p[1] := q[1];
  1220. q[0] := t;
  1221. q[1] := t1;
  1222. end;
  1223. var
  1224. pu : PUC_Prop;
  1225. cccp, cccq : Byte;
  1226. begin
  1227. c := ALength;
  1228. if (c < 2) then
  1229. exit;
  1230. p := AStr;
  1231. i := 1;
  1232. while (i < c) do begin
  1233. pu := GetProps(Word(p^));
  1234. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  1235. if locIsSurrogateP then begin
  1236. if (i = (c - 1)) then
  1237. Break;
  1238. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  1239. Inc(p);
  1240. Inc(i);
  1241. Continue;
  1242. end;
  1243. pu := GetProps(p[0],p[1]);
  1244. end;
  1245. if (pu^.CCC > 0) then begin
  1246. cccp := pu^.CCC;
  1247. if locIsSurrogateP then
  1248. q := p + 2
  1249. else
  1250. q := p + 1;
  1251. pu := GetProps(Word(q^));
  1252. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  1253. if locIsSurrogateQ then begin
  1254. if (i = c) then
  1255. Break;
  1256. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  1257. Inc(p);
  1258. Inc(i);
  1259. Continue;
  1260. end;
  1261. pu := GetProps(q[0],q[1]);
  1262. end;
  1263. cccq := pu^.CCC;
  1264. if (cccq > 0) and (cccp > cccq) then begin
  1265. Swap();
  1266. if (i > 1) then begin
  1267. Dec(p);
  1268. Dec(i);
  1269. pu := GetProps(Word(p^));
  1270. if (pu^.Category = UGC_Surrogate) then begin
  1271. if (i > 1) then begin
  1272. Dec(p);
  1273. Dec(i);
  1274. end;
  1275. end;
  1276. Continue;
  1277. end;
  1278. end;
  1279. end;
  1280. if locIsSurrogateP then begin
  1281. Inc(p);
  1282. Inc(i);
  1283. end;
  1284. Inc(p);
  1285. Inc(i);
  1286. end;
  1287. end;
  1288. function DecomposeCanonical(const AString : UnicodeString) : UnicodeString;
  1289. begin
  1290. Result := DecomposeCanonical(@AString[1],Length(AString));
  1291. end;
  1292. function DecomposeCanonical(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  1293. const MAX_EXPAND = 3;
  1294. var
  1295. i, c, kc, k : SizeInt;
  1296. pp, pr : PUnicodeChar;
  1297. pu : PUC_Prop;
  1298. locIsSurrogate : Boolean;
  1299. cpArray : array[0..7] of Cardinal;
  1300. cp : Cardinal;
  1301. begin
  1302. c := ALength;
  1303. SetLength(Result,(MAX_EXPAND*c));
  1304. if (c > 0) then begin
  1305. pp := AStr;
  1306. pr := @Result[1];
  1307. i := 1;
  1308. while (i <= c) do begin
  1309. pu := GetProps(Word(pp^));
  1310. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1311. if locIsSurrogate then begin
  1312. if (i = c) then
  1313. Break;
  1314. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  1315. pr^ := pp^;
  1316. Inc(pp);
  1317. Inc(pr);
  1318. Inc(i);
  1319. Continue;
  1320. end;
  1321. pu := GetProps(pp[0],pp[1]);
  1322. end;
  1323. if pu^.HangulSyllable then begin
  1324. if locIsSurrogate then begin
  1325. cp := ToUCS4(pp[0],pp[1]);
  1326. Inc(pp);
  1327. Inc(i);
  1328. end else begin
  1329. cp := Word(pp^);
  1330. end;
  1331. kc := DecomposeHangul(cp,@cpArray[0]);
  1332. for k := 0 to kc - 1 do begin
  1333. if (cpArray[k] <= MAX_WORD) then begin
  1334. pr^ := UnicodeChar(Word(cpArray[k]));
  1335. pr := pr + 1;
  1336. end else begin
  1337. FromUCS4(cpArray[k],pr[0],pr[1]);
  1338. pr := pr + 2;
  1339. end;
  1340. end;
  1341. if (kc > 0) then
  1342. Dec(pr);
  1343. end else begin
  1344. if (pu^.DecompositionID = -1) then begin
  1345. pr^ := pp^;
  1346. if locIsSurrogate then begin
  1347. Inc(pp);
  1348. Inc(pr);
  1349. Inc(i);
  1350. pr^ := pp^;
  1351. end;
  1352. end else begin
  1353. k := Decompose(pu^.DecompositionID,pr);
  1354. pr := pr + (k - 1);
  1355. if locIsSurrogate then begin
  1356. Inc(pp);
  1357. Inc(i);
  1358. end;
  1359. end;
  1360. end;
  1361. Inc(pp);
  1362. Inc(pr);
  1363. Inc(i);
  1364. end;
  1365. Dec(pp);
  1366. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  1367. SetLength(Result,i);
  1368. CanonicalOrder(@Result[1],Length(Result));
  1369. end;
  1370. end;
  1371. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  1372. begin
  1373. if (ALength < 1) then
  1374. exit('');
  1375. Result := DecomposeCanonical(AStr,ALength);
  1376. CanonicalOrder(@Result[1],Length(Result));
  1377. end;
  1378. //Canonical Decomposition + _ordering_
  1379. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  1380. begin
  1381. Result := NormalizeNFD(@AString[1],Length(AString));
  1382. end;
  1383. { TUCA_PropItemContextTreeNodeRec }
  1384. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  1385. begin
  1386. if (Self.Left = 0) then
  1387. Result := nil
  1388. else
  1389. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  1390. end;
  1391. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  1392. begin
  1393. if (Self.Right = 0) then
  1394. Result := nil
  1395. else
  1396. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  1397. end;
  1398. { TUCA_PropItemContextRec }
  1399. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  1400. begin
  1401. Result := PUInt24(
  1402. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  1403. SizeOf(Self.WeightCount)
  1404. );
  1405. end;
  1406. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  1407. begin
  1408. Result := PUCA_PropWeights(
  1409. PtrUInt(@Self) +
  1410. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  1411. (Self.CodePointCount*SizeOf(UInt24))
  1412. );
  1413. end;
  1414. { TUCA_PropItemContextTreeRec }
  1415. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  1416. begin
  1417. if (Size = 0) then
  1418. Result := nil
  1419. else
  1420. Result := PUCA_PropItemContextTreeNodeRec(
  1421. PtrUInt(
  1422. PtrUInt(@Self) + SizeOf(UInt24){Size}
  1423. )
  1424. );
  1425. end;
  1426. function CompareCodePoints(
  1427. A : PUInt24; LA : Integer;
  1428. B : PUInt24; LB : Integer
  1429. ) : Integer;
  1430. var
  1431. i, hb : Integer;
  1432. begin
  1433. if (A = B) then
  1434. exit(0);
  1435. Result := 1;
  1436. hb := LB - 1;
  1437. for i := 0 to LA - 1 do begin
  1438. if (i > hb) then
  1439. exit;
  1440. if (A[i] < B[i]) then
  1441. exit(-1);
  1442. if (A[i] > B[i]) then
  1443. exit(1);
  1444. end;
  1445. if (LA = LB) then
  1446. exit(0);
  1447. exit(-1);
  1448. end;
  1449. function TUCA_PropItemContextTreeRec.Find(
  1450. const AChars : PUInt24;
  1451. const ACharCount : Integer;
  1452. out ANode : PUCA_PropItemContextTreeNodeRec
  1453. ) : Boolean;
  1454. var
  1455. t : PUCA_PropItemContextTreeNodeRec;
  1456. begin
  1457. t := Data;
  1458. while (t <> nil) do begin
  1459. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1460. 0 : Break;
  1461. -1 : t := t^.GetLeftNode();
  1462. else
  1463. t := t^.GetRightNode();
  1464. end;
  1465. end;
  1466. Result := (t <> nil);
  1467. if Result then
  1468. ANode := t;
  1469. end;
  1470. { TUC_Prop }
  1471. function TUC_Prop.GetCategory: Byte;
  1472. begin
  1473. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1474. end;
  1475. function TUC_Prop.GetNumericValue: Double;
  1476. begin
  1477. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1478. end;
  1479. procedure TUC_Prop.SetCategory(AValue: Byte);
  1480. begin
  1481. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1482. end;
  1483. function TUC_Prop.GetWhiteSpace: Boolean;
  1484. begin
  1485. Result := IsBitON(CategoryData,0);
  1486. end;
  1487. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1488. begin
  1489. SetBit(CategoryData,0,AValue);
  1490. end;
  1491. function TUC_Prop.GetHangulSyllable: Boolean;
  1492. begin
  1493. Result := IsBitON(CategoryData,1);
  1494. end;
  1495. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1496. begin
  1497. SetBit(CategoryData,1,AValue);
  1498. end;
  1499. { TUCA_DataBook }
  1500. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1501. begin
  1502. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1503. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1504. end;
  1505. { TUCA_PropItemRec }
  1506. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  1507. begin
  1508. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  1509. end;
  1510. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  1511. begin
  1512. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  1513. end;
  1514. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1515. begin
  1516. if HasCodePoint() then begin
  1517. if Contextual then
  1518. Result := Unaligned(
  1519. PUInt24(
  1520. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1521. Cardinal(GetContext()^.Size)
  1522. )^
  1523. )
  1524. else
  1525. Result := Unaligned(PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^)
  1526. end else begin
  1527. {$ifdef uni_debug}
  1528. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1529. {$else uni_debug}
  1530. Result := ZERO_UINT24;
  1531. {$endif uni_debug}
  1532. end
  1533. end;
  1534. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1535. begin
  1536. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1537. end;
  1538. function TUCA_PropItemRec.IsValid() : Boolean;
  1539. begin
  1540. Result := IsBitON(Flags,FLAG_VALID);
  1541. end;
  1542. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1543. begin
  1544. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1545. end;}
  1546. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1547. var
  1548. c : Integer;
  1549. p : PByte;
  1550. pd : PUCA_PropWeights;
  1551. begin
  1552. c := WeightLength;
  1553. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1554. pd := ADest;
  1555. pd^.Weights[0] := Unaligned(PWord(p)^);
  1556. p := p + 2;
  1557. if not IsWeightCompress_1() then begin
  1558. pd^.Weights[1] := Unaligned(PWord(p)^);
  1559. p := p + 2;
  1560. end else begin
  1561. pd^.Weights[1] := p^;
  1562. p := p + 1;
  1563. end;
  1564. if not IsWeightCompress_2() then begin
  1565. pd^.Weights[2] := Unaligned(PWord(p)^);
  1566. p := p + 2;
  1567. end else begin
  1568. pd^.Weights[2] := p^;
  1569. p := p + 1;
  1570. end;
  1571. if (c > 1) then
  1572. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1573. end;
  1574. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  1575. begin
  1576. Result := SizeOf(TUCA_PropItemRec);
  1577. if (WeightLength > 0) then begin
  1578. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1579. if IsWeightCompress_1() then
  1580. Result := Result - 1;
  1581. if IsWeightCompress_2() then
  1582. Result := Result - 1;
  1583. end;
  1584. if HasCodePoint() then
  1585. Result := Result + SizeOf(UInt24);
  1586. if Contextual then
  1587. Result := Result + Cardinal(GetContext()^.Size);
  1588. end;
  1589. function TUCA_PropItemRec.GetContextual: Boolean;
  1590. begin
  1591. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1592. end;
  1593. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1594. var
  1595. p : PtrUInt;
  1596. begin
  1597. if not Contextual then
  1598. exit(nil);
  1599. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1600. if IsBitON(Flags,FLAG_CODEPOINT) then
  1601. p := p + SizeOf(UInt24);
  1602. Result := PUCA_PropItemContextTreeRec(p);
  1603. end;
  1604. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1605. begin
  1606. Result := IsBitON(Flags,FLAG_DELETION);
  1607. end;
  1608. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1609. var
  1610. i : Cardinal;
  1611. begin
  1612. if (ABook^.BMP_Table2 = nil) then
  1613. exit(nil);
  1614. i := ABook^.BMP_Table2[
  1615. (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
  1616. Lo(Word(AChar))
  1617. ];
  1618. if (i > 0) then
  1619. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1620. else
  1621. Result := nil;
  1622. end;
  1623. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1624. var
  1625. i : Cardinal;
  1626. begin
  1627. if (ABook^.OBMP_Table2 = nil) then
  1628. exit(nil);
  1629. i := ABook^.OBMP_Table2[
  1630. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1631. Word(ALowS) - LOW_SURROGATE_BEGIN
  1632. ];
  1633. if (i > 0) then
  1634. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1635. else
  1636. Result := nil;
  1637. end;
  1638. {$include weight_derivation.inc}
  1639. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1640. var
  1641. bb : TUCASortKey;
  1642. begin
  1643. SetLength(bb,Length(B));
  1644. if (Length(bb) > 0) then
  1645. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1646. Result := CompareSortKey(A,bb);
  1647. end;
  1648. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1649. var
  1650. i, hb : Integer;
  1651. begin
  1652. if (Pointer(A) = Pointer(B)) then
  1653. exit(0);
  1654. Result := 1;
  1655. hb := Length(B) - 1;
  1656. for i := 0 to Length(A) - 1 do begin
  1657. if (i > hb) then
  1658. exit;
  1659. if (A[i] < B[i]) then
  1660. exit(-1);
  1661. if (A[i] > B[i]) then
  1662. exit(1);
  1663. end;
  1664. if (Length(A) = Length(B)) then
  1665. exit(0);
  1666. exit(-1);
  1667. end;
  1668. type
  1669. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1670. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1671. var
  1672. r : TUCASortKey;
  1673. i, c, k, ral, levelCount : Integer;
  1674. pce : PUCA_PropWeights;
  1675. begin
  1676. c := Length(ACEList);
  1677. if (c = 0) then
  1678. exit(nil);
  1679. levelCount := Length(ACEList[0].Weights);
  1680. SetLength(r,(levelCount*c + levelCount));
  1681. ral := 0;
  1682. for i := 0 to levelCount - 1 do begin
  1683. if not ACollation^.Backwards[i] then begin
  1684. pce := @ACEList[0];
  1685. for k := 0 to c - 1 do begin
  1686. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1687. r[ral] := pce^.Weights[i];
  1688. ral := ral + 1;
  1689. end;
  1690. pce := pce + 1;
  1691. end;
  1692. end else begin
  1693. pce := @ACEList[c-1];
  1694. for k := 0 to c - 1 do begin
  1695. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1696. r[ral] := pce^.Weights[i];
  1697. ral := ral + 1;
  1698. end;
  1699. pce := pce - 1;
  1700. end;
  1701. end;
  1702. r[ral] := 0;
  1703. ral := ral + 1;
  1704. end;
  1705. ral := ral - 1;
  1706. SetLength(r,ral);
  1707. Result := r;
  1708. end;
  1709. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1710. var
  1711. r : TUCASortKey;
  1712. i, c, k, ral, levelCount : Integer;
  1713. pce : PUCA_PropWeights;
  1714. begin
  1715. c := Length(ACEList);
  1716. if (c = 0) then
  1717. exit(nil);
  1718. levelCount := Length(ACEList[0].Weights);
  1719. SetLength(r,(levelCount*c + levelCount));
  1720. ral := 0;
  1721. for i := 0 to levelCount - 1 do begin
  1722. if not ACollation^.Backwards[i] then begin
  1723. pce := @ACEList[0];
  1724. for k := 0 to c - 1 do begin
  1725. if (pce^.Weights[i] <> 0) then begin
  1726. r[ral] := pce^.Weights[i];
  1727. ral := ral + 1;
  1728. end;
  1729. pce := pce + 1;
  1730. end;
  1731. end else begin
  1732. pce := @ACEList[c-1];
  1733. for k := 0 to c - 1 do begin
  1734. if (pce^.Weights[i] <> 0) then begin
  1735. r[ral] := pce^.Weights[i];
  1736. ral := ral + 1;
  1737. end;
  1738. pce := pce - 1;
  1739. end;
  1740. end;
  1741. r[ral] := 0;
  1742. ral := ral + 1;
  1743. end;
  1744. ral := ral - 1;
  1745. SetLength(r,ral);
  1746. Result := r;
  1747. end;
  1748. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1749. var
  1750. r : TUCASortKey;
  1751. i, c, k, ral, levelCount : Integer;
  1752. pce : PUCA_PropWeights;
  1753. variableState : Boolean;
  1754. begin
  1755. c := Length(ACEList);
  1756. if (c = 0) then
  1757. exit(nil);
  1758. levelCount := Length(ACEList[0].Weights);
  1759. SetLength(r,(levelCount*c + levelCount));
  1760. ral := 0;
  1761. for i := 0 to levelCount - 1 do begin
  1762. if not ACollation^.Backwards[i] then begin
  1763. variableState := False;
  1764. pce := @ACEList[0];
  1765. for k := 0 to c - 1 do begin
  1766. if not ACollation^.IsVariable(pce) then begin
  1767. if (pce^.Weights[0] <> 0) then
  1768. variableState := False;
  1769. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1770. r[ral] := pce^.Weights[i];
  1771. ral := ral + 1;
  1772. end;
  1773. end else begin
  1774. variableState := True;
  1775. end;
  1776. pce := pce + 1;
  1777. end;
  1778. end else begin
  1779. pce := @ACEList[c-1];
  1780. for k := 0 to c - 1 do begin
  1781. if not ACollation^.IsVariable(pce) then begin
  1782. if (pce^.Weights[0] <> 0) then
  1783. variableState := False;
  1784. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1785. r[ral] := pce^.Weights[i];
  1786. ral := ral + 1;
  1787. end;
  1788. end else begin
  1789. variableState := True;
  1790. end;
  1791. pce := pce - 1;
  1792. end;
  1793. end;
  1794. r[ral] := 0;
  1795. ral := ral + 1;
  1796. end;
  1797. ral := ral - 1;
  1798. SetLength(r,ral);
  1799. Result := r;
  1800. end;
  1801. function FormKeyShiftedTrimmed(
  1802. const ACEList : TUCA_PropWeightsArray;
  1803. const ACollation : PUCA_DataBook
  1804. ) : TUCASortKey;
  1805. var
  1806. i : Integer;
  1807. p : ^TUCASortKeyItem;
  1808. begin
  1809. Result := FormKeyShifted(ACEList,ACollation);
  1810. i := Length(Result) - 1;
  1811. if (i >= 0) then begin
  1812. p := @Result[i];
  1813. while (i >= 0) do begin
  1814. if (p^ <> $FFFF) then
  1815. Break;
  1816. Dec(i);
  1817. Dec(p);
  1818. end;
  1819. if ((i+1) < Length(Result)) then
  1820. SetLength(Result,(i+1));
  1821. end;
  1822. end;
  1823. function FindChild(
  1824. const ACodePoint : Cardinal;
  1825. const AParent : PUCA_PropItemRec
  1826. ) : PUCA_PropItemRec;inline;
  1827. var
  1828. k : Integer;
  1829. begin
  1830. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1831. for k := 0 to AParent^.ChildCount - 1 do begin
  1832. if (ACodePoint = Result^.CodePoint) then
  1833. exit;
  1834. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1835. end;
  1836. Result := nil;
  1837. end;
  1838. function ComputeSortKey(
  1839. const AString : UnicodeString;
  1840. const ACollation : PUCA_DataBook
  1841. ) : TUCASortKey;
  1842. begin
  1843. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1844. end;
  1845. function ComputeRawSortKey(
  1846. const AStr : PUnicodeChar;
  1847. const ALength : SizeInt;
  1848. const ACollation : PUCA_DataBook
  1849. ) : TUCA_PropWeightsArray;
  1850. var
  1851. r : TUCA_PropWeightsArray;
  1852. ral {used length of "r"}: Integer;
  1853. rl {capacity of "r"} : Integer;
  1854. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1855. begin
  1856. if (rl < AMinGrow) then
  1857. rl := rl + AMinGrow
  1858. else
  1859. rl := 2 * rl;
  1860. SetLength(r,rl);
  1861. end;
  1862. var
  1863. i : Integer;
  1864. s : UnicodeString;
  1865. ps : PUnicodeChar;
  1866. cp : Cardinal;
  1867. cl : PUCA_DataBook;
  1868. pp : PUCA_PropItemRec;
  1869. ppLevel : Byte;
  1870. removedCharIndex : array of DWord;
  1871. removedCharIndexLength : DWord;
  1872. locHistory : array[0..24] of record
  1873. i : Integer;
  1874. cl : PUCA_DataBook;
  1875. pp : PUCA_PropItemRec;
  1876. ppLevel : Byte;
  1877. cp : Cardinal;
  1878. removedCharIndexLength : DWord;
  1879. end;
  1880. locHistoryTop : Integer;
  1881. suppressState : record
  1882. cl : PUCA_DataBook;
  1883. CharCount : Integer;
  1884. end;
  1885. LastKeyOwner : record
  1886. Length : Integer;
  1887. Chars : array[0..24] of UInt24;
  1888. end;
  1889. procedure SaveKeyOwner();
  1890. var
  1891. k : Integer;
  1892. kppLevel : Byte;
  1893. begin
  1894. k := 0;
  1895. kppLevel := High(Byte);
  1896. while (k <= locHistoryTop) do begin
  1897. if (kppLevel <> locHistory[k].ppLevel) then begin
  1898. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1899. kppLevel := locHistory[k].ppLevel;
  1900. end;
  1901. k := k + 1;
  1902. end;
  1903. if (k = 0) or (kppLevel <> ppLevel) then begin
  1904. LastKeyOwner.Chars[k] := cp;
  1905. k := k + 1;
  1906. end;
  1907. LastKeyOwner.Length := k;
  1908. end;
  1909. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1910. begin
  1911. SaveKeyOwner();
  1912. if ((ral + AItem^.WeightLength) > rl) then
  1913. GrowKey(AItem^.WeightLength);
  1914. AItem^.GetWeightArray(@r[ral]);
  1915. ral := ral + AItem^.WeightLength;
  1916. end;
  1917. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1918. begin
  1919. if ((ral + AItem^.WeightCount) > rl) then
  1920. GrowKey(AItem^.WeightCount);
  1921. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1922. ral := ral + AItem^.WeightCount;
  1923. end;
  1924. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1925. begin
  1926. SaveKeyOwner();
  1927. if ((ral + 2) > rl) then
  1928. GrowKey();
  1929. DeriveWeight(ACodePoint,@r[ral]);
  1930. ral := ral + 2;
  1931. end;
  1932. procedure RecordDeletion();inline;
  1933. begin
  1934. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1935. if (suppressState.cl = nil) or
  1936. (suppressState.CharCount > ppLevel)
  1937. then begin
  1938. suppressState.cl := cl;
  1939. suppressState.CharCount := ppLevel;
  1940. end;
  1941. end;
  1942. end;
  1943. procedure RecordStep();inline;
  1944. begin
  1945. Inc(locHistoryTop);
  1946. locHistory[locHistoryTop].i := i;
  1947. locHistory[locHistoryTop].cl := cl;
  1948. locHistory[locHistoryTop].pp := pp;
  1949. locHistory[locHistoryTop].ppLevel := ppLevel;
  1950. locHistory[locHistoryTop].cp := cp;
  1951. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1952. RecordDeletion();
  1953. end;
  1954. procedure ClearHistory();inline;
  1955. begin
  1956. locHistoryTop := -1;
  1957. end;
  1958. function HasHistory() : Boolean;inline;
  1959. begin
  1960. Result := (locHistoryTop >= 0);
  1961. end;
  1962. function GetHistoryLength() : Integer;inline;
  1963. begin
  1964. Result := (locHistoryTop + 1);
  1965. end;
  1966. procedure GoBack();inline;
  1967. begin
  1968. Assert(locHistoryTop >= 0);
  1969. i := locHistory[locHistoryTop].i;
  1970. cp := locHistory[locHistoryTop].cp;
  1971. cl := locHistory[locHistoryTop].cl;
  1972. pp := locHistory[locHistoryTop].pp;
  1973. ppLevel := locHistory[locHistoryTop].ppLevel;
  1974. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1975. ps := @s[i];
  1976. Dec(locHistoryTop);
  1977. end;
  1978. var
  1979. c : Integer;
  1980. lastUnblockedNonstarterCCC : Byte;
  1981. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1982. var
  1983. k : DWord;
  1984. pk : PUnicodeChar;
  1985. puk : PUC_Prop;
  1986. begin
  1987. k := AStartFrom;
  1988. if (k > c) then
  1989. exit(False);
  1990. if (removedCharIndexLength>0) and
  1991. (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0)
  1992. then begin
  1993. exit(False);
  1994. end;
  1995. {if (k = (i+1)) or
  1996. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1997. then
  1998. lastUnblockedNonstarterCCC := 0;}
  1999. pk := @s[k];
  2000. if UnicodeIsHighSurrogate(pk^) then begin
  2001. if (k = c) then
  2002. exit(False);
  2003. if UnicodeIsLowSurrogate(pk[1]) then
  2004. puk := GetProps(pk[0],pk[1])
  2005. else
  2006. puk := GetProps(Word(pk^));
  2007. end else begin
  2008. puk := GetProps(Word(pk^));
  2009. end;
  2010. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  2011. exit(False);
  2012. lastUnblockedNonstarterCCC := puk^.CCC;
  2013. Result := True;
  2014. end;
  2015. procedure RemoveChar(APos : Integer);inline;
  2016. begin
  2017. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2018. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2019. removedCharIndex[removedCharIndexLength] := APos;
  2020. Inc(removedCharIndexLength);
  2021. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  2022. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2023. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2024. removedCharIndex[removedCharIndexLength] := APos+1;
  2025. Inc(removedCharIndexLength);
  2026. end;
  2027. end;
  2028. procedure Inc_I();inline;
  2029. begin
  2030. if (removedCharIndexLength = 0) then begin
  2031. Inc(i);
  2032. Inc(ps);
  2033. exit;
  2034. end;
  2035. while True do begin
  2036. Inc(i);
  2037. Inc(ps);
  2038. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  2039. Break;
  2040. end;
  2041. end;
  2042. var
  2043. surrogateState : Boolean;
  2044. function MoveToNextChar() : Boolean;inline;
  2045. begin
  2046. Result := True;
  2047. if UnicodeIsHighSurrogate(ps[0]) then begin
  2048. if (i = c) then
  2049. exit(False);
  2050. if UnicodeIsLowSurrogate(ps[1]) then begin
  2051. surrogateState := True;
  2052. cp := ToUCS4(ps[0],ps[1]);
  2053. end else begin
  2054. surrogateState := False;
  2055. cp := Word(ps[0]);
  2056. end;
  2057. end else begin
  2058. surrogateState := False;
  2059. cp := Word(ps[0]);
  2060. end;
  2061. end;
  2062. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  2063. begin
  2064. cl := nil;
  2065. pp := nil;
  2066. ppLevel := 0;
  2067. if AClearSuppressInfo then begin
  2068. suppressState.cl := nil;
  2069. suppressState.CharCount := 0;
  2070. end;
  2071. end;
  2072. function FindPropUCA() : Boolean;
  2073. var
  2074. candidateCL : PUCA_DataBook;
  2075. begin
  2076. pp := nil;
  2077. if (cl = nil) then
  2078. candidateCL := ACollation
  2079. else
  2080. candidateCL := cl;
  2081. if surrogateState then begin
  2082. while (candidateCL <> nil) do begin
  2083. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  2084. if (pp <> nil) then
  2085. break;
  2086. candidateCL := candidateCL^.Base;
  2087. end;
  2088. end else begin
  2089. while (candidateCL <> nil) do begin
  2090. pp := GetPropUCA(ps[0],candidateCL);
  2091. if (pp <> nil) then
  2092. break;
  2093. candidateCL := candidateCL^.Base;
  2094. end;
  2095. end;
  2096. cl := candidateCL;
  2097. Result := (pp <> nil);
  2098. end;
  2099. procedure AddWeightsAndClear();inline;
  2100. var
  2101. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2102. begin
  2103. if (pp^.WeightLength > 0) then begin
  2104. AddWeights(pp);
  2105. end else
  2106. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2107. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2108. (ctxNode^.Data.WeightCount > 0)
  2109. then begin
  2110. AddContextWeights(@ctxNode^.Data);
  2111. end;
  2112. //AddWeights(pp);
  2113. ClearHistory();
  2114. ClearPP();
  2115. end;
  2116. procedure StartMatch();
  2117. procedure HandleLastChar();
  2118. var
  2119. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2120. begin
  2121. while True do begin
  2122. if pp^.IsValid() then begin
  2123. if (pp^.WeightLength > 0) then
  2124. AddWeights(pp)
  2125. else
  2126. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2127. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2128. (ctxNode^.Data.WeightCount > 0)
  2129. then
  2130. AddContextWeights(@ctxNode^.Data)
  2131. else
  2132. AddComputedWeights(cp){handle deletion of code point};
  2133. break;
  2134. end;
  2135. if (cl^.Base = nil) then begin
  2136. AddComputedWeights(cp);
  2137. break;
  2138. end;
  2139. cl := cl^.Base;
  2140. if not FindPropUCA() then begin
  2141. AddComputedWeights(cp);
  2142. break;
  2143. end;
  2144. end;
  2145. end;
  2146. var
  2147. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2148. begin
  2149. ppLevel := 0;
  2150. if not FindPropUCA() then begin
  2151. AddComputedWeights(cp);
  2152. ClearHistory();
  2153. ClearPP();
  2154. end else begin
  2155. if (i = c) then begin
  2156. HandleLastChar();
  2157. end else begin
  2158. if pp^.IsValid()then begin
  2159. if (pp^.ChildCount = 0) then begin
  2160. if (pp^.WeightLength > 0) then
  2161. AddWeights(pp)
  2162. else
  2163. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2164. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  2165. (tmpCtxNode^.Data.WeightCount > 0)
  2166. then
  2167. AddContextWeights(@tmpCtxNode^.Data)
  2168. else
  2169. AddComputedWeights(cp){handle deletion of code point};
  2170. ClearPP();
  2171. ClearHistory();
  2172. end else begin
  2173. RecordStep();
  2174. end
  2175. end else begin
  2176. if (pp^.ChildCount = 0) then begin
  2177. AddComputedWeights(cp);
  2178. ClearPP();
  2179. ClearHistory();
  2180. end else begin
  2181. RecordStep();
  2182. end;
  2183. end ;
  2184. end;
  2185. end;
  2186. end;
  2187. function TryPermutation() : Boolean;
  2188. var
  2189. kk : Integer;
  2190. b : Boolean;
  2191. puk : PUC_Prop;
  2192. ppk : PUCA_PropItemRec;
  2193. begin
  2194. Result := False;
  2195. puk := GetProps(cp);
  2196. if (puk^.CCC = 0) then
  2197. exit;
  2198. lastUnblockedNonstarterCCC := puk^.CCC;
  2199. if surrogateState then
  2200. kk := i + 2
  2201. else
  2202. kk := i + 1;
  2203. while IsUnblockedNonstarter(kk) do begin
  2204. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  2205. if b then
  2206. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  2207. else
  2208. ppk := FindChild(Word(s[kk]),pp);
  2209. if (ppk <> nil) then begin
  2210. pp := ppk;
  2211. RemoveChar(kk);
  2212. Inc(ppLevel);
  2213. RecordStep();
  2214. Result := True;
  2215. if (pp^.ChildCount = 0 ) then
  2216. Break;
  2217. end;
  2218. if b then
  2219. Inc(kk);
  2220. Inc(kk);
  2221. end;
  2222. end;
  2223. procedure AdvanceCharPos();inline;
  2224. begin
  2225. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  2226. Inc(i);
  2227. Inc(ps);
  2228. end;
  2229. Inc_I();
  2230. end;
  2231. var
  2232. ok : Boolean;
  2233. pp1 : PUCA_PropItemRec;
  2234. cltemp : PUCA_DataBook;
  2235. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2236. begin
  2237. if (ALength = 0) then
  2238. exit(nil);
  2239. c := ALength;
  2240. s := NormalizeNFD(AStr,c);
  2241. c := Length(s);
  2242. rl := 3*c;
  2243. SetLength(r,rl);
  2244. ral := 0;
  2245. ps := @s[1];
  2246. ClearPP();
  2247. locHistoryTop := -1;
  2248. removedCharIndexLength := 0;
  2249. FillByte(suppressState,SizeOf(suppressState),0);
  2250. LastKeyOwner.Length := 0;
  2251. i := 1;
  2252. while (i <= c) and MoveToNextChar() do begin
  2253. if (pp = nil) then begin // Start Matching
  2254. StartMatch();
  2255. end else begin
  2256. pp1 := FindChild(cp,pp);
  2257. if (pp1 <> nil) then begin
  2258. Inc(ppLevel);
  2259. pp := pp1;
  2260. if (pp^.ChildCount = 0) or (i = c) then begin
  2261. ok := False;
  2262. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2263. if (pp^.WeightLength > 0) then begin
  2264. AddWeightsAndClear();
  2265. ok := True;
  2266. end else
  2267. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2268. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2269. (ctxNode^.Data.WeightCount > 0)
  2270. then begin
  2271. AddContextWeights(@ctxNode^.Data);
  2272. ClearHistory();
  2273. ClearPP();
  2274. ok := True;
  2275. end
  2276. end;
  2277. if not ok then begin
  2278. RecordDeletion();
  2279. ok := False;
  2280. while HasHistory() do begin
  2281. GoBack();
  2282. if pp^.IsValid() and
  2283. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2284. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2285. )
  2286. then begin
  2287. AddWeightsAndClear();
  2288. ok := True;
  2289. Break;
  2290. end;
  2291. end;
  2292. if not ok then begin
  2293. cltemp := cl^.Base;
  2294. if (cltemp <> nil) then begin
  2295. ClearPP(False);
  2296. cl := cltemp;
  2297. Continue;
  2298. end;
  2299. end;
  2300. if not ok then begin
  2301. AddComputedWeights(cp);
  2302. ClearHistory();
  2303. ClearPP();
  2304. end;
  2305. end;
  2306. end else begin
  2307. RecordStep();
  2308. end;
  2309. end else begin
  2310. // permutations !
  2311. ok := False;
  2312. if TryPermutation() and pp^.IsValid() then begin
  2313. if (suppressState.CharCount = 0) then begin
  2314. AddWeightsAndClear();
  2315. Continue;
  2316. end;
  2317. while True do begin
  2318. if pp^.IsValid() and
  2319. (pp^.WeightLength > 0) and
  2320. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2321. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2322. )
  2323. then begin
  2324. AddWeightsAndClear();
  2325. ok := True;
  2326. break;
  2327. end;
  2328. if not HasHistory() then
  2329. break;
  2330. GoBack();
  2331. if (pp = nil) then
  2332. break;
  2333. end;
  2334. end;
  2335. if not ok then begin
  2336. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2337. if (pp^.WeightLength > 0) then begin
  2338. AddWeightsAndClear();
  2339. ok := True;
  2340. end else
  2341. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2342. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2343. (ctxNode^.Data.WeightCount > 0)
  2344. then begin
  2345. AddContextWeights(@ctxNode^.Data);
  2346. ClearHistory();
  2347. ClearPP();
  2348. ok := True;
  2349. end
  2350. end;
  2351. if ok then
  2352. Continue;
  2353. end;
  2354. if not ok then begin
  2355. if (cl^.Base <> nil) then begin
  2356. cltemp := cl^.Base;
  2357. while HasHistory() do
  2358. GoBack();
  2359. pp := nil;
  2360. ppLevel := 0;
  2361. cl := cltemp;
  2362. Continue;
  2363. end;
  2364. //walk back
  2365. ok := False;
  2366. while HasHistory() do begin
  2367. GoBack();
  2368. if pp^.IsValid() and
  2369. (pp^.WeightLength > 0) and
  2370. ( (suppressState.CharCount = 0) or
  2371. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2372. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2373. )
  2374. )
  2375. then begin
  2376. AddWeightsAndClear();
  2377. ok := True;
  2378. Break;
  2379. end;
  2380. end;
  2381. if ok then begin
  2382. AdvanceCharPos();
  2383. Continue;
  2384. end;
  2385. if (pp <> nil) then begin
  2386. AddComputedWeights(cp);
  2387. ClearHistory();
  2388. ClearPP();
  2389. end;
  2390. end;
  2391. end;
  2392. end;
  2393. if surrogateState then begin
  2394. Inc(ps);
  2395. Inc(i);
  2396. end;
  2397. //
  2398. Inc_I();
  2399. end;
  2400. SetLength(r,ral);
  2401. Result := r;
  2402. end;
  2403. type
  2404. TComputeKeyContext = record
  2405. Collation : PUCA_DataBook;
  2406. r : TUCA_PropWeightsArray;
  2407. ral {used length of "r"}: Integer;
  2408. rl {capacity of "r"} : Integer;
  2409. i : Integer;
  2410. s : UnicodeString;
  2411. ps : PUnicodeChar;
  2412. cp : Cardinal;
  2413. cl : PUCA_DataBook;
  2414. pp : PUCA_PropItemRec;
  2415. ppLevel : Byte;
  2416. removedCharIndex : array of DWord;
  2417. removedCharIndexLength : DWord;
  2418. locHistoryTop : Integer;
  2419. locHistory : array[0..24] of record
  2420. i : Integer;
  2421. cl : PUCA_DataBook;
  2422. pp : PUCA_PropItemRec;
  2423. ppLevel : Byte;
  2424. cp : Cardinal;
  2425. removedCharIndexLength : DWord;
  2426. end;
  2427. suppressState : record
  2428. cl : PUCA_DataBook;
  2429. CharCount : Integer;
  2430. end;
  2431. LastKeyOwner : record
  2432. Length : Integer;
  2433. Chars : array[0..24] of UInt24;
  2434. end;
  2435. c : Integer;
  2436. lastUnblockedNonstarterCCC : Byte;
  2437. surrogateState : Boolean;
  2438. Finished : Boolean;
  2439. end;
  2440. PComputeKeyContext = ^TComputeKeyContext;
  2441. procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
  2442. begin
  2443. AContext^.cl := nil;
  2444. AContext^.pp := nil;
  2445. AContext^.ppLevel := 0;
  2446. if AClearSuppressInfo then begin
  2447. AContext^.suppressState.cl := nil;
  2448. AContext^.suppressState.CharCount := 0;
  2449. end;
  2450. end;
  2451. procedure InitContext(
  2452. AContext : PComputeKeyContext;
  2453. const AStr : PUnicodeChar;
  2454. const ALength : SizeInt;
  2455. const ACollation : PUCA_DataBook
  2456. );
  2457. begin
  2458. AContext^.Collation := ACollation;
  2459. AContext^.c := ALength;
  2460. AContext^.s := NormalizeNFD(AStr,AContext^.c);
  2461. AContext^.c := Length(AContext^.s);
  2462. AContext^.rl := 3*AContext^.c;
  2463. SetLength(AContext^.r,AContext^.rl);
  2464. AContext^.ral := 0;
  2465. AContext^.ps := @AContext^.s[1];
  2466. ClearPP(AContext);
  2467. AContext^.locHistoryTop := -1;
  2468. AContext^.removedCharIndexLength := 0;
  2469. FillByte(AContext^.suppressState,SizeOf(AContext^.suppressState),0);
  2470. AContext^.LastKeyOwner.Length := 0;
  2471. AContext^.i := 1;
  2472. AContext^.Finished := False;
  2473. end;
  2474. function FormKey(
  2475. const AWeightArray : TUCA_PropWeightsArray;
  2476. const ACollation : PUCA_DataBook
  2477. ) : TUCASortKey;inline;
  2478. begin
  2479. case ACollation.VariableWeight of
  2480. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
  2481. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
  2482. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
  2483. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
  2484. else
  2485. Result := FormKeyShifted(AWeightArray,ACollation);
  2486. end;
  2487. end;
  2488. function ComputeRawSortKeyNextItem(
  2489. const AContext : PComputeKeyContext
  2490. ) : Boolean;forward;
  2491. function IncrementalCompareString_NonIgnorable(
  2492. const AStrA : PUnicodeChar;
  2493. const ALengthA : SizeInt;
  2494. const AStrB : PUnicodeChar;
  2495. const ALengthB : SizeInt;
  2496. const ACollation : PUCA_DataBook
  2497. ) : Integer;
  2498. var
  2499. ctxA, ctxB : TComputeKeyContext;
  2500. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2501. keyIndexB : Integer;
  2502. keyA, keyB : TUCASortKey;
  2503. begin
  2504. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2505. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2506. (ALengthA = ALengthB)
  2507. )
  2508. then
  2509. exit(0);
  2510. if (ALengthA = 0) then
  2511. exit(-1);
  2512. if (ALengthB = 0) then
  2513. exit(1);
  2514. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2515. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2516. lastKeyIndexA := -1;
  2517. keyIndexA := -1;
  2518. lengthMaxA := 0;
  2519. keyIndexB := -1;
  2520. while True do begin
  2521. if not ComputeRawSortKeyNextItem(@ctxA) then
  2522. Break;
  2523. if (ctxA.ral = lengthMaxA) then
  2524. Continue;
  2525. lengthMaxA := ctxA.ral;
  2526. keyIndexA := lastKeyIndexA + 1;
  2527. while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
  2528. Inc(keyIndexA);
  2529. end;
  2530. if (keyIndexA = lengthMaxA) then begin
  2531. lastKeyIndexA := keyIndexA-1;
  2532. Continue;
  2533. end;
  2534. while (keyIndexA < lengthMaxA) do begin
  2535. if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
  2536. Inc(keyIndexA);
  2537. Continue;
  2538. end;
  2539. Inc(keyIndexB);
  2540. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2541. if (ctxB.ral <= keyIndexB) then begin
  2542. if not ComputeRawSortKeyNextItem(@ctxB) then
  2543. Break;
  2544. Continue;
  2545. end;
  2546. Inc(keyIndexB);
  2547. end;
  2548. if (ctxB.ral <= keyIndexB) then
  2549. exit(1);
  2550. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2551. exit(1);
  2552. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2553. exit(-1);
  2554. Inc(keyIndexA);
  2555. end;
  2556. lastKeyIndexA := keyIndexA - 1;
  2557. end;
  2558. //Key(A) is completed !
  2559. Inc(keyIndexB);
  2560. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2561. if (ctxB.ral <= keyIndexB) then begin
  2562. if not ComputeRawSortKeyNextItem(@ctxB) then
  2563. Break;
  2564. Continue;
  2565. end;
  2566. Inc(keyIndexB);
  2567. end;
  2568. if (ctxB.ral > keyIndexB) then begin
  2569. //B has at least one more primary weight that A
  2570. exit(-1);
  2571. end;
  2572. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2573. //
  2574. end;
  2575. //Key(B) is completed !
  2576. keyA := FormKey(ctxA.r,ctxA.Collation);
  2577. keyB := FormKey(ctxB.r,ctxB.Collation);
  2578. Result := CompareSortKey(keyA,keyB);
  2579. end;
  2580. function IncrementalCompareString_Shift(
  2581. const AStrA : PUnicodeChar;
  2582. const ALengthA : SizeInt;
  2583. const AStrB : PUnicodeChar;
  2584. const ALengthB : SizeInt;
  2585. const ACollation : PUCA_DataBook
  2586. ) : Integer;
  2587. var
  2588. ctxA, ctxB : TComputeKeyContext;
  2589. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2590. keyIndexB : Integer;
  2591. keyA, keyB : TUCASortKey;
  2592. begin
  2593. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2594. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2595. (ALengthA = ALengthB)
  2596. )
  2597. then
  2598. exit(0);
  2599. if (ALengthA = 0) then
  2600. exit(-1);
  2601. if (ALengthB = 0) then
  2602. exit(1);
  2603. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2604. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2605. lastKeyIndexA := -1;
  2606. keyIndexA := -1;
  2607. lengthMaxA := 0;
  2608. keyIndexB := -1;
  2609. while True do begin
  2610. if not ComputeRawSortKeyNextItem(@ctxA) then
  2611. Break;
  2612. if (ctxA.ral = lengthMaxA) then
  2613. Continue;
  2614. lengthMaxA := ctxA.ral;
  2615. keyIndexA := lastKeyIndexA + 1;
  2616. while (keyIndexA < lengthMaxA) and
  2617. ( (ctxA.r[keyIndexA].Weights[0] = 0) or
  2618. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2619. )
  2620. do begin
  2621. Inc(keyIndexA);
  2622. end;
  2623. if (keyIndexA = lengthMaxA) then begin
  2624. lastKeyIndexA := keyIndexA-1;
  2625. Continue;
  2626. end;
  2627. while (keyIndexA < lengthMaxA) do begin
  2628. if (ctxA.r[keyIndexA].Weights[0] = 0) or
  2629. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2630. then begin
  2631. Inc(keyIndexA);
  2632. Continue;
  2633. end;
  2634. Inc(keyIndexB);
  2635. while (ctxB.ral <= keyIndexB) or
  2636. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2637. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2638. do begin
  2639. if (ctxB.ral <= keyIndexB) then begin
  2640. if not ComputeRawSortKeyNextItem(@ctxB) then
  2641. Break;
  2642. Continue;
  2643. end;
  2644. Inc(keyIndexB);
  2645. end;
  2646. if (ctxB.ral <= keyIndexB) then
  2647. exit(1);
  2648. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2649. exit(1);
  2650. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2651. exit(-1);
  2652. Inc(keyIndexA);
  2653. end;
  2654. lastKeyIndexA := keyIndexA - 1;
  2655. end;
  2656. //Key(A) is completed !
  2657. Inc(keyIndexB);
  2658. while (ctxB.ral <= keyIndexB) or
  2659. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2660. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2661. do begin
  2662. if (ctxB.ral <= keyIndexB) then begin
  2663. if not ComputeRawSortKeyNextItem(@ctxB) then
  2664. Break;
  2665. Continue;
  2666. end;
  2667. Inc(keyIndexB);
  2668. end;
  2669. if (ctxB.ral > keyIndexB) then begin
  2670. //B has at least one more primary weight that A
  2671. exit(-1);
  2672. end;
  2673. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2674. //
  2675. end;
  2676. //Key(B) is completed !
  2677. keyA := FormKey(ctxA.r,ctxA.Collation);
  2678. keyB := FormKey(ctxB.r,ctxB.Collation);
  2679. Result := CompareSortKey(keyA,keyB);
  2680. end;
  2681. function IncrementalCompareString(
  2682. const AStrA : PUnicodeChar;
  2683. const ALengthA : SizeInt;
  2684. const AStrB : PUnicodeChar;
  2685. const ALengthB : SizeInt;
  2686. const ACollation : PUCA_DataBook
  2687. ) : Integer;
  2688. begin
  2689. case ACollation^.VariableWeight of
  2690. TUCA_VariableKind.ucaNonIgnorable :
  2691. begin
  2692. Result := IncrementalCompareString_NonIgnorable(
  2693. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2694. );
  2695. end;
  2696. TUCA_VariableKind.ucaBlanked,
  2697. TUCA_VariableKind.ucaShiftedTrimmed,
  2698. TUCA_VariableKind.ucaIgnoreSP,
  2699. TUCA_VariableKind.ucaShifted:
  2700. begin
  2701. Result := IncrementalCompareString_Shift(
  2702. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2703. );
  2704. end;
  2705. else
  2706. begin
  2707. Result := IncrementalCompareString_Shift(
  2708. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2709. );
  2710. end;
  2711. end;
  2712. end;
  2713. function IncrementalCompareString(
  2714. const AStrA,
  2715. AStrB : UnicodeString;
  2716. const ACollation : PUCA_DataBook
  2717. ) : Integer;
  2718. begin
  2719. Result := IncrementalCompareString(
  2720. Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
  2721. ACollation
  2722. );
  2723. end;
  2724. function ComputeRawSortKeyNextItem(
  2725. const AContext : PComputeKeyContext
  2726. ) : Boolean;
  2727. var
  2728. ctx : PComputeKeyContext;
  2729. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  2730. begin
  2731. if (ctx^.rl < AMinGrow) then
  2732. ctx^.rl := ctx^.rl + AMinGrow
  2733. else
  2734. ctx^.rl := 2 * ctx^.rl;
  2735. SetLength(ctx^.r,ctx^.rl);
  2736. end;
  2737. procedure SaveKeyOwner();
  2738. var
  2739. k : Integer;
  2740. kppLevel : Byte;
  2741. begin
  2742. k := 0;
  2743. kppLevel := High(Byte);
  2744. while (k <= ctx^.locHistoryTop) do begin
  2745. if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
  2746. ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
  2747. kppLevel := ctx^.locHistory[k].ppLevel;
  2748. end;
  2749. k := k + 1;
  2750. end;
  2751. if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
  2752. ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
  2753. k := k + 1;
  2754. end;
  2755. ctx^.LastKeyOwner.Length := k;
  2756. end;
  2757. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  2758. begin
  2759. SaveKeyOwner();
  2760. if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
  2761. GrowKey(AItem^.WeightLength);
  2762. AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
  2763. ctx^.ral := ctx^.ral + AItem^.WeightLength;
  2764. end;
  2765. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  2766. begin
  2767. if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
  2768. GrowKey(AItem^.WeightCount);
  2769. Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
  2770. ctx^.ral := ctx^.ral + AItem^.WeightCount;
  2771. end;
  2772. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  2773. begin
  2774. SaveKeyOwner();
  2775. if ((ctx^.ral + 2) > ctx^.rl) then
  2776. GrowKey();
  2777. DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
  2778. ctx^.ral := ctx^.ral + 2;
  2779. end;
  2780. procedure RecordDeletion();inline;
  2781. begin
  2782. if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  2783. if (ctx^.suppressState.cl = nil) or
  2784. (ctx^.suppressState.CharCount > ctx^.ppLevel)
  2785. then begin
  2786. ctx^.suppressState.cl := ctx^.cl;
  2787. ctx^.suppressState.CharCount := ctx^.ppLevel;
  2788. end;
  2789. end;
  2790. end;
  2791. procedure RecordStep();inline;
  2792. begin
  2793. Inc(ctx^.locHistoryTop);
  2794. ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
  2795. ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
  2796. ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
  2797. ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
  2798. ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
  2799. ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
  2800. RecordDeletion();
  2801. end;
  2802. procedure ClearHistory();inline;
  2803. begin
  2804. ctx^.locHistoryTop := -1;
  2805. end;
  2806. function HasHistory() : Boolean;inline;
  2807. begin
  2808. Result := (ctx^.locHistoryTop >= 0);
  2809. end;
  2810. function GetHistoryLength() : Integer;inline;
  2811. begin
  2812. Result := (ctx^.locHistoryTop + 1);
  2813. end;
  2814. procedure GoBack();inline;
  2815. begin
  2816. Assert(ctx^.locHistoryTop >= 0);
  2817. ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
  2818. ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
  2819. ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
  2820. ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
  2821. ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
  2822. ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
  2823. ctx^.ps := @ctx^.s[ctx^.i];
  2824. Dec(ctx^.locHistoryTop);
  2825. end;
  2826. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  2827. var
  2828. k : DWord;
  2829. pk : PUnicodeChar;
  2830. puk : PUC_Prop;
  2831. begin
  2832. k := AStartFrom;
  2833. if (k > ctx^.c) then
  2834. exit(False);
  2835. if (ctx^.removedCharIndexLength>0) and
  2836. (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,k) >= 0)
  2837. then begin
  2838. exit(False);
  2839. end;
  2840. {if (k = (i+1)) or
  2841. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  2842. then
  2843. lastUnblockedNonstarterCCC := 0;}
  2844. pk := @ctx^.s[k];
  2845. if UnicodeIsHighSurrogate(pk^) then begin
  2846. if (k = ctx^.c) then
  2847. exit(False);
  2848. if UnicodeIsLowSurrogate(pk[1]) then
  2849. puk := GetProps(pk[0],pk[1])
  2850. else
  2851. puk := GetProps(Word(pk^));
  2852. end else begin
  2853. puk := GetProps(Word(pk^));
  2854. end;
  2855. if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
  2856. exit(False);
  2857. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  2858. Result := True;
  2859. end;
  2860. procedure RemoveChar(APos : Integer);inline;
  2861. begin
  2862. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2863. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2864. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
  2865. Inc(ctx^.removedCharIndexLength);
  2866. if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
  2867. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2868. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2869. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
  2870. Inc(ctx^.removedCharIndexLength);
  2871. end;
  2872. end;
  2873. procedure Inc_I();inline;
  2874. begin
  2875. if (ctx^.removedCharIndexLength = 0) then begin
  2876. Inc(ctx^.i);
  2877. Inc(ctx^.ps);
  2878. exit;
  2879. end;
  2880. while True do begin
  2881. Inc(ctx^.i);
  2882. Inc(ctx^.ps);
  2883. if (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,ctx^.i) = -1) then
  2884. Break;
  2885. end;
  2886. end;
  2887. function MoveToNextChar() : Boolean;inline;
  2888. begin
  2889. Result := True;
  2890. if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
  2891. if (ctx^.i = ctx^.c) then
  2892. exit(False);
  2893. if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  2894. ctx^.surrogateState := True;
  2895. ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
  2896. end else begin
  2897. ctx^.surrogateState := False;
  2898. ctx^.cp := Word(ctx^.ps[0]);
  2899. end;
  2900. end else begin
  2901. ctx^.surrogateState := False;
  2902. ctx^.cp := Word(ctx^.ps[0]);
  2903. end;
  2904. end;
  2905. function FindPropUCA() : Boolean;
  2906. var
  2907. candidateCL : PUCA_DataBook;
  2908. begin
  2909. ctx^.pp := nil;
  2910. if (ctx^.cl = nil) then
  2911. candidateCL := ctx^.Collation
  2912. else
  2913. candidateCL := ctx^.cl;
  2914. if ctx^.surrogateState then begin
  2915. while (candidateCL <> nil) do begin
  2916. ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
  2917. if (ctx^.pp <> nil) then
  2918. break;
  2919. candidateCL := candidateCL^.Base;
  2920. end;
  2921. end else begin
  2922. while (candidateCL <> nil) do begin
  2923. ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
  2924. if (ctx^.pp <> nil) then
  2925. break;
  2926. candidateCL := candidateCL^.Base;
  2927. end;
  2928. end;
  2929. ctx^.cl := candidateCL;
  2930. Result := (ctx^.pp <> nil);
  2931. end;
  2932. procedure AddWeightsAndClear();inline;
  2933. var
  2934. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2935. begin
  2936. if (ctx^.pp^.WeightLength > 0) then begin
  2937. AddWeights(ctx^.pp);
  2938. end else
  2939. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2940. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2941. (ctxNode^.Data.WeightCount > 0)
  2942. then begin
  2943. AddContextWeights(@ctxNode^.Data);
  2944. end;
  2945. //AddWeights(pp);
  2946. ClearHistory();
  2947. ClearPP(ctx);
  2948. end;
  2949. function StartMatch() : Boolean;
  2950. procedure HandleLastChar();
  2951. var
  2952. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2953. begin
  2954. while True do begin
  2955. if ctx^.pp^.IsValid() then begin
  2956. if (ctx^.pp^.WeightLength > 0) then
  2957. AddWeights(ctx^.pp)
  2958. else
  2959. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2960. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2961. (ctxNode^.Data.WeightCount > 0)
  2962. then
  2963. AddContextWeights(@ctxNode^.Data)
  2964. else
  2965. AddComputedWeights(ctx^.cp){handle deletion of code point};
  2966. break;
  2967. end;
  2968. if (ctx^.cl^.Base = nil) then begin
  2969. AddComputedWeights(ctx^.cp);
  2970. break;
  2971. end;
  2972. ctx^.cl := ctx^.cl^.Base;
  2973. if not FindPropUCA() then begin
  2974. AddComputedWeights(ctx^.cp);
  2975. break;
  2976. end;
  2977. end;
  2978. end;
  2979. var
  2980. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2981. begin
  2982. Result := False;
  2983. ctx^.ppLevel := 0;
  2984. if not FindPropUCA() then begin
  2985. AddComputedWeights(ctx^.cp);
  2986. ClearHistory();
  2987. ClearPP(ctx);
  2988. Result := True;
  2989. end else begin
  2990. if (ctx^.i = ctx^.c) then begin
  2991. HandleLastChar();
  2992. Result := True;
  2993. end else begin
  2994. if ctx^.pp^.IsValid()then begin
  2995. if (ctx^.pp^.ChildCount = 0) then begin
  2996. if (ctx^.pp^.WeightLength > 0) then
  2997. AddWeights(ctx^.pp)
  2998. else
  2999. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3000. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
  3001. (tmpCtxNode^.Data.WeightCount > 0)
  3002. then
  3003. AddContextWeights(@tmpCtxNode^.Data)
  3004. else
  3005. AddComputedWeights(ctx^.cp){handle deletion of code point};
  3006. ClearPP(ctx);
  3007. ClearHistory();
  3008. Result := True;
  3009. end else begin
  3010. RecordStep();
  3011. end
  3012. end else begin
  3013. if (ctx^.pp^.ChildCount = 0) then begin
  3014. AddComputedWeights(ctx^.cp);
  3015. ClearPP(ctx);
  3016. ClearHistory();
  3017. Result := True;
  3018. end else begin
  3019. RecordStep();
  3020. end;
  3021. end;
  3022. end;
  3023. end;
  3024. end;
  3025. function TryPermutation() : Boolean;
  3026. var
  3027. kk : Integer;
  3028. b : Boolean;
  3029. puk : PUC_Prop;
  3030. ppk : PUCA_PropItemRec;
  3031. begin
  3032. Result := False;
  3033. puk := GetProps(ctx^.cp);
  3034. if (puk^.CCC = 0) then
  3035. exit;
  3036. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  3037. if ctx^.surrogateState then
  3038. kk := ctx^.i + 2
  3039. else
  3040. kk := ctx^.i + 1;
  3041. while IsUnblockedNonstarter(kk) do begin
  3042. b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
  3043. if b then
  3044. ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
  3045. else
  3046. ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
  3047. if (ppk <> nil) then begin
  3048. ctx^.pp := ppk;
  3049. RemoveChar(kk);
  3050. Inc(ctx^.ppLevel);
  3051. RecordStep();
  3052. Result := True;
  3053. if (ctx^.pp^.ChildCount = 0 ) then
  3054. Break;
  3055. end;
  3056. if b then
  3057. Inc(kk);
  3058. Inc(kk);
  3059. end;
  3060. end;
  3061. procedure AdvanceCharPos();inline;
  3062. begin
  3063. if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  3064. Inc(ctx^.i);
  3065. Inc(ctx^.ps);
  3066. end;
  3067. Inc_I();
  3068. end;
  3069. var
  3070. ok : Boolean;
  3071. pp1 : PUCA_PropItemRec;
  3072. cltemp : PUCA_DataBook;
  3073. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3074. begin
  3075. if AContext^.Finished then
  3076. exit(False);
  3077. ctx := AContext;
  3078. while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
  3079. ok := False;
  3080. if (ctx^.pp = nil) then begin // Start Matching
  3081. ok := StartMatch();
  3082. end else begin
  3083. pp1 := FindChild(ctx^.cp,ctx^.pp);
  3084. if (pp1 <> nil) then begin
  3085. Inc(ctx^.ppLevel);
  3086. ctx^.pp := pp1;
  3087. if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
  3088. ok := False;
  3089. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3090. if (ctx^.pp^.WeightLength > 0) then begin
  3091. AddWeightsAndClear();
  3092. ok := True;
  3093. end else
  3094. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3095. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3096. (ctxNode^.Data.WeightCount > 0)
  3097. then begin
  3098. AddContextWeights(@ctxNode^.Data);
  3099. ClearHistory();
  3100. ClearPP(ctx);
  3101. ok := True;
  3102. end
  3103. end;
  3104. if not ok then begin
  3105. RecordDeletion();
  3106. while HasHistory() do begin
  3107. GoBack();
  3108. if ctx^.pp^.IsValid() and
  3109. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3110. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3111. )
  3112. then begin
  3113. AddWeightsAndClear();
  3114. ok := True;
  3115. Break;
  3116. end;
  3117. end;
  3118. if not ok then begin
  3119. cltemp := ctx^.cl^.Base;
  3120. if (cltemp <> nil) then begin
  3121. ClearPP(ctx,False);
  3122. ctx^.cl := cltemp;
  3123. Continue;
  3124. end;
  3125. end;
  3126. if not ok then begin
  3127. AddComputedWeights(ctx^.cp);
  3128. ClearHistory();
  3129. ClearPP(ctx);
  3130. ok := True;
  3131. end;
  3132. end;
  3133. end else begin
  3134. RecordStep();
  3135. end;
  3136. end else begin
  3137. // permutations !
  3138. ok := False;
  3139. if TryPermutation() and ctx^.pp^.IsValid() then begin
  3140. if (ctx^.suppressState.CharCount = 0) then begin
  3141. AddWeightsAndClear();
  3142. ok := True;
  3143. exit(True);// Continue;
  3144. end;
  3145. while True do begin
  3146. if ctx^.pp^.IsValid() and
  3147. (ctx^.pp^.WeightLength > 0) and
  3148. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3149. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3150. )
  3151. then begin
  3152. AddWeightsAndClear();
  3153. ok := True;
  3154. break;
  3155. end;
  3156. if not HasHistory() then
  3157. break;
  3158. GoBack();
  3159. if (ctx^.pp = nil) then
  3160. break;
  3161. end;
  3162. end;
  3163. if not ok then begin
  3164. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3165. if (ctx^.pp^.WeightLength > 0) then begin
  3166. AddWeightsAndClear();
  3167. ok := True;
  3168. end else
  3169. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3170. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3171. (ctxNode^.Data.WeightCount > 0)
  3172. then begin
  3173. AddContextWeights(@ctxNode^.Data);
  3174. ClearHistory();
  3175. ClearPP(ctx);
  3176. ok := True;
  3177. end
  3178. end;
  3179. if ok then
  3180. exit(True);// Continue;
  3181. end;
  3182. if not ok then begin
  3183. if (ctx^.cl^.Base <> nil) then begin
  3184. cltemp := ctx^.cl^.Base;
  3185. while HasHistory() do
  3186. GoBack();
  3187. ctx^.pp := nil;
  3188. ctx^.ppLevel := 0;
  3189. ctx^.cl := cltemp;
  3190. Continue;
  3191. end;
  3192. //walk back
  3193. ok := False;
  3194. while HasHistory() do begin
  3195. GoBack();
  3196. if ctx^.pp^.IsValid() and
  3197. (ctx^.pp^.WeightLength > 0) and
  3198. ( (ctx^.suppressState.CharCount = 0) or
  3199. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3200. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3201. )
  3202. )
  3203. then begin
  3204. AddWeightsAndClear();
  3205. ok := True;
  3206. Break;
  3207. end;
  3208. end;
  3209. if ok then begin
  3210. AdvanceCharPos();
  3211. exit(True);// Continue;
  3212. end;
  3213. if (ctx^.pp <> nil) then begin
  3214. AddComputedWeights(ctx^.cp);
  3215. ClearHistory();
  3216. ClearPP(ctx);
  3217. ok := True;
  3218. end;
  3219. end;
  3220. end;
  3221. end;
  3222. if ctx^.surrogateState then begin
  3223. Inc(ctx^.ps);
  3224. Inc(ctx^.i);
  3225. end;
  3226. //
  3227. Inc_I();
  3228. if ok then
  3229. exit(True);
  3230. end;
  3231. SetLength(ctx^.r,ctx^.ral);
  3232. ctx^.Finished := True;
  3233. Result := True;
  3234. end;
  3235. function ComputeSortKey(
  3236. const AStr : PUnicodeChar;
  3237. const ALength : SizeInt;
  3238. const ACollation : PUCA_DataBook
  3239. ) : TUCASortKey;
  3240. var
  3241. r : TUCA_PropWeightsArray;
  3242. begin
  3243. r := ComputeRawSortKey(AStr,ALength,ACollation);
  3244. Result := FormKey(r,ACollation);
  3245. end;
  3246. end.