unicodedata.pas 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. -------------------------------------------------------------------------------
  16. Overview of the Unicode Collation Algorithm(UCA) data layout :
  17. ============================================================
  18. The UCA data(see “TUCA_DataBook”) are organized into index data
  19. (see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
  20. “OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
  21. the “Props” field of “TUCA_DataBook”). The index is a 3 level
  22. tables designed to minimize the overhaul data size. The
  23. properties’ data contain the actual (used) UCA’s properties
  24. for the customized code points(or sequence of code points)
  25. data (see TUCA_PropItemRec).
  26. To get the properties’ record of a code point, one goes
  27. through the index data to get its offset into the “Props”
  28. serialized data, see the “GetPropUCA” procedure.
  29. The “TUCA_PropItemRec” record, that represents the actual
  30. properties, contains a fixed part and a variable part. The
  31. fixed part is directly expressed as fields of the record :
  32. “WeightLength”, “ChildCount”, “Size”, “Flags”. The
  33. variable part depends on some values of the fixed part; For
  34. example “WeightLength” specify the number of weight[1] item,
  35. it can be zero or not null; The “Flags” fields does contains
  36. some bit states to indicate for example if the record’s owner,
  37. that is the target code point, is present(it is not always
  38. necessary to store the code point as you are required to have
  39. this information in the first place in order to get the
  40. “TUCA_PropItemRec” record).
  41. The data, as it is organized now, is as follow for each code point :
  42. * the fixed part is serialized,
  43. * if there are weight item array, they are serialized
  44. (see the "WeigthLength")
  45. * the code point is serialized (if needed)
  46. * the context[2] array is serialized
  47. * The children[3] record are serialized.
  48. The “Size” represent the size of the whole record, including its
  49. children records(see [3]). The “GetSelfOnlySize” returns the size
  50. of the queried record, excluding the size of its children.
  51. Notes :
  52. [1] : A weight item is an array of 3 words. A code point/sequence of code
  53. point may have zero or multiple items.
  54. [2] : There are characters(mostly japanese ones) that do not have their
  55. own weighs; There inherit the weights of the preceding character
  56. in the string that you will be evaluating.
  57. [3] : Some unicode characters are expressed using more than one code point.
  58. In that case the properties records are serialized as a trie. The
  59. trie data structure is useful when many characters’ expression have
  60. the same starting code point(s).
  61. [4] TUCA_PropItemRec serialization :
  62. TUCA_PropItemRec :
  63. WeightLength, ChildCount, Size, Flags [weight item array]
  64. [Code Point] [Context data]
  65. [Child 0] [Child 1] .. [Child n]
  66. each [Child k] is a TUCA_PropItemRec.
  67. }
  68. unit unicodedata;
  69. {$IFDEF FPC}
  70. {$mode delphi}
  71. {$H+}
  72. {$PACKENUM 1}
  73. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  74. {$DEFINE HAS_PUSH}
  75. {$DEFINE HAS_COMPARE_BYTE}
  76. {$DEFINE INLINE_SUPPORT_PRIVATE_VARS}
  77. {$DEFINE HAS_UNALIGNED}
  78. {$ENDIF FPC}
  79. {$IFNDEF FPC}
  80. {$UNDEF HAS_COMPARE_BYTE}
  81. {$UNDEF HAS_PUSH}
  82. {$DEFINE ENDIAN_LITTLE}
  83. {$ENDIF !FPC}
  84. {$SCOPEDENUMS ON}
  85. {$pointermath on}
  86. {$define USE_INLINE}
  87. { $define uni_debug}
  88. interface
  89. {$IFNDEF FPC}
  90. type
  91. UnicodeChar = WideChar;
  92. PUnicodeChar = ^UnicodeChar;
  93. SizeInt = NativeInt;
  94. DWord = UInt32;
  95. PDWord = ^DWord;
  96. PtrInt = NativeInt;
  97. PtrUInt = NativeUInt;
  98. {$ENDIF !FPC}
  99. {$IF not Declared(reCodesetConversion)}
  100. const reCodesetConversion = reRangeError;
  101. {$IFEND reCodesetConversion}
  102. {$IF not Declared(DirectorySeparator)}
  103. {$IFDEF MSWINDOWS}
  104. const DirectorySeparator = '\';
  105. {$ELSE}
  106. const DirectorySeparator = '/';
  107. {$ENDIF MSWINDOWS}
  108. {$IFEND DirectorySeparator}
  109. const
  110. MAX_WORD = High(Word);
  111. LOW_SURROGATE_BEGIN = Word($DC00);
  112. LOW_SURROGATE_END = Word($DFFF);
  113. HIGH_SURROGATE_BEGIN = Word($D800);
  114. HIGH_SURROGATE_END = Word($DBFF);
  115. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  116. UCS4_HALF_BASE = LongWord($10000);
  117. UCS4_HALF_MASK = Word($3FF);
  118. MAX_LEGAL_UTF32 = $10FFFF;
  119. const
  120. // Unicode General Category
  121. UGC_UppercaseLetter = 0;
  122. UGC_LowercaseLetter = 1;
  123. UGC_TitlecaseLetter = 2;
  124. UGC_ModifierLetter = 3;
  125. UGC_OtherLetter = 4;
  126. UGC_NonSpacingMark = 5;
  127. UGC_CombiningMark = 6;
  128. UGC_EnclosingMark = 7;
  129. UGC_DecimalNumber = 8;
  130. UGC_LetterNumber = 9;
  131. UGC_OtherNumber = 10;
  132. UGC_ConnectPunctuation = 11;
  133. UGC_DashPunctuation = 12;
  134. UGC_OpenPunctuation = 13;
  135. UGC_ClosePunctuation = 14;
  136. UGC_InitialPunctuation = 15;
  137. UGC_FinalPunctuation = 16;
  138. UGC_OtherPunctuation = 17;
  139. UGC_MathSymbol = 18;
  140. UGC_CurrencySymbol = 19;
  141. UGC_ModifierSymbol = 20;
  142. UGC_OtherSymbol = 21;
  143. UGC_SpaceSeparator = 22;
  144. UGC_LineSeparator = 23;
  145. UGC_ParagraphSeparator = 24;
  146. UGC_Control = 25;
  147. UGC_Format = 26;
  148. UGC_Surrogate = 27;
  149. UGC_PrivateUse = 28;
  150. UGC_Unassigned = 29;
  151. // Names
  152. UnicodeCategoryNames: array[0..29] of string[2] = (
  153. 'Lu',
  154. 'Ll',
  155. 'Lt',
  156. 'Lm',
  157. 'Lo',
  158. 'Mn',
  159. 'Mc',
  160. 'Me',
  161. 'Nd',
  162. 'Nl',
  163. 'No',
  164. 'Pc',
  165. 'Pd',
  166. 'Ps',
  167. 'Pe',
  168. 'Pi',
  169. 'Pf',
  170. 'Po',
  171. 'Sm',
  172. 'Sc',
  173. 'Sk',
  174. 'So',
  175. 'Zs',
  176. 'Zl',
  177. 'Zp',
  178. 'Cc',
  179. 'Cf',
  180. 'Cs',
  181. 'Co',
  182. 'Cn'
  183. );
  184. type
  185. TUInt24Rec = packed record
  186. public
  187. {$ifdef ENDIAN_LITTLE}
  188. byte0, byte1, byte2 : Byte;
  189. {$else ENDIAN_LITTLE}
  190. byte2, byte1, byte0 : Byte;
  191. {$endif ENDIAN_LITTLE}
  192. public
  193. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  194. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  195. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  196. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  197. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  198. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  199. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  200. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  201. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  202. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  203. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  204. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  205. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  206. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  207. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  208. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  209. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  210. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  211. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  212. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  213. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  214. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  215. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  216. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  217. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  218. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  219. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  220. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  221. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  222. end;
  223. UInt24 = TUInt24Rec;
  224. PUInt24 = ^UInt24;
  225. const
  226. ZERO_UINT24 : UInt24 =
  227. {$ifdef ENDIAN_LITTLE}
  228. (byte0 : 0; byte1 : 0; byte2 : 0;);
  229. {$else ENDIAN_LITTLE}
  230. (byte2 : 0; byte1 : 0; byte0 : 0;);
  231. {$endif ENDIAN_LITTLE}
  232. type
  233. PUC_Prop = ^TUC_Prop;
  234. { TUC_Prop }
  235. { On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
  236. This breaks intended layout of initialized constants/variables.
  237. A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
  238. but for now just declare this record as "unpacked". This causes bloat, but it's better than having
  239. entire unit not working at all. }
  240. TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
  241. private
  242. function GetCategory : Byte;inline;
  243. procedure SetCategory(AValue : Byte);
  244. function GetWhiteSpace : Boolean;inline;
  245. procedure SetWhiteSpace(AValue : Boolean);
  246. function GetHangulSyllable : Boolean;inline;
  247. procedure SetHangulSyllable(AValue : Boolean);
  248. function GetNumericValue: Double;inline;
  249. public
  250. CategoryData : Byte;
  251. public
  252. CCC : Byte;
  253. NumericIndex : Byte;
  254. SimpleUpperCase : UInt24;
  255. SimpleLowerCase : UInt24;
  256. DecompositionID : SmallInt;
  257. public
  258. property Category : Byte read GetCategory write SetCategory;
  259. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  260. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  261. property NumericValue : Double read GetNumericValue;
  262. end;
  263. type
  264. TUCA_PropWeights = packed record
  265. Weights : array[0..2] of Word;
  266. end;
  267. PUCA_PropWeights = ^TUCA_PropWeights;
  268. TUCA_PropItemContextRec = packed record
  269. public
  270. CodePointCount : Byte;
  271. WeightCount : Byte;
  272. public
  273. function GetCodePoints() : PUInt24;inline;
  274. function GetWeights() : PUCA_PropWeights;inline;
  275. end;
  276. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  277. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  278. TUCA_PropItemContextTreeNodeRec = packed record
  279. public
  280. Left : Word;
  281. Right : Word;
  282. Data : TUCA_PropItemContextRec;
  283. public
  284. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  285. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  286. end;
  287. { TUCA_PropItemContextTreeRec }
  288. TUCA_PropItemContextTreeRec = packed record
  289. public
  290. Size : UInt24;
  291. public
  292. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  293. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  294. function Find(
  295. const AChars : PUInt24;
  296. const ACharCount : Integer;
  297. out ANode : PUCA_PropItemContextTreeNodeRec
  298. ) : Boolean;
  299. end;
  300. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  301. { TUCA_PropItemRec }
  302. TUCA_PropItemRec = packed record
  303. private
  304. const FLAG_VALID = 0;
  305. const FLAG_CODEPOINT = 1;
  306. const FLAG_CONTEXTUAL = 2;
  307. const FLAG_DELETION = 3;
  308. const FLAG_COMPRESS_WEIGHT_1 = 6;
  309. const FLAG_COMPRESS_WEIGHT_2 = 7;
  310. private
  311. function GetCodePoint() : UInt24;inline;
  312. public
  313. WeightLength : Byte;
  314. ChildCount : Byte;
  315. Size : Word;
  316. Flags : Byte;
  317. public
  318. function HasCodePoint() : Boolean;inline;
  319. property CodePoint : UInt24 read GetCodePoint;
  320. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  321. function IsValid() : Boolean;inline;
  322. //function GetWeightArray() : PUCA_PropWeights;inline;
  323. procedure GetWeightArray(ADest : PUCA_PropWeights);
  324. function GetSelfOnlySize() : Cardinal;inline;
  325. function GetContextual() : Boolean;inline;
  326. property Contextual : Boolean read GetContextual;
  327. function GetContext() : PUCA_PropItemContextTreeRec;
  328. function IsDeleted() : Boolean;inline;
  329. function IsWeightCompress_1() : Boolean;inline;
  330. function IsWeightCompress_2() : Boolean;inline;
  331. end;
  332. PUCA_PropItemRec = ^TUCA_PropItemRec;
  333. TUCA_VariableKind = (
  334. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  335. ucaIgnoreSP // This one is not implemented !
  336. );
  337. TCollationName = array[0..(128-1)] of Byte;
  338. TCollationVersion = TCollationName;
  339. PUCA_DataBook = ^TUCA_DataBook;
  340. TUCA_DataBook = record
  341. public
  342. Base : PUCA_DataBook;
  343. Version : TCollationVersion;
  344. CollationName : TCollationName;
  345. VariableWeight : TUCA_VariableKind;
  346. Backwards : array[0..3] of Boolean;
  347. BMP_Table1 : PByte;
  348. BMP_Table2 : PUInt24;
  349. OBMP_Table1 : PWord;
  350. OBMP_Table2 : PUInt24;
  351. PropCount : Integer;
  352. Props : PUCA_PropItemRec;
  353. VariableLowLimit : Word;
  354. VariableHighLimit : Word;
  355. NoNormalization : Boolean;
  356. ComparisonStrength : Byte;
  357. Dynamic : Boolean;
  358. public
  359. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  360. end;
  361. TUnicodeStringArray = array of UnicodeString;
  362. TCollationTableItem = record
  363. Collation : PUCA_DataBook;
  364. Aliases : TUnicodeStringArray;
  365. end;
  366. PCollationTableItem = ^TCollationTableItem;
  367. TCollationTableItemArray = array of TCollationTableItem;
  368. { TCollationTable }
  369. TCollationTable = record
  370. private
  371. FItems : TCollationTableItemArray;
  372. FCount : Integer;
  373. private
  374. function GetCapacity : Integer;
  375. function GetCount : Integer;
  376. function GetItem(const AIndex : Integer) : PCollationTableItem;
  377. procedure Grow();
  378. procedure ClearItem(AItem : PCollationTableItem);
  379. procedure AddAlias(
  380. AItem : PCollationTableItem;
  381. AAlias : UnicodeString
  382. );overload;
  383. public
  384. class function NormalizeName(AName : UnicodeString) : UnicodeString;static;
  385. procedure Clear();
  386. function IndexOf(AName : UnicodeString) : Integer;overload;
  387. function IndexOf(ACollation : PUCA_DataBook) : Integer;overload;
  388. function Find(AName : UnicodeString) : PCollationTableItem;overload;
  389. function Find(ACollation : PUCA_DataBook) : PCollationTableItem;overload;
  390. function Add(ACollation : PUCA_DataBook) : Integer;
  391. function AddAlias(AName, AAlias : UnicodeString) : Boolean;overload;
  392. function Remove(AIndex : Integer) : PUCA_DataBook;
  393. property Item[const AIndex : Integer] : PCollationTableItem read GetItem;default;
  394. property Count : Integer read GetCount;
  395. property Capacity : Integer read GetCapacity;
  396. end;
  397. TCollationField = (
  398. BackWard, VariableLowLimit, VariableHighLimit, Alternate, Normalization,
  399. Strength
  400. );
  401. TCollationFields = set of TCollationField;
  402. const
  403. ROOT_COLLATION_NAME = 'DUCET';
  404. ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
  405. procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
  406. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  407. function UnicodeIsSurrogatePair(
  408. const AHighSurrogate,
  409. ALowSurrogate : UnicodeChar
  410. ) : Boolean;inline;
  411. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  412. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  413. function UnicodeToUpper(
  414. const AString : UnicodeString;
  415. const AIgnoreInvalidSequence : Boolean;
  416. out AResultString : UnicodeString
  417. ) : Integer;
  418. function UnicodeToLower(
  419. const AString : UnicodeString;
  420. const AIgnoreInvalidSequence : Boolean;
  421. out AResultString : UnicodeString
  422. ) : Integer;
  423. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  424. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  425. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  426. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  427. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  428. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  429. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  430. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  431. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  432. type
  433. TUCASortKeyItem = Word;
  434. TUCASortKey = array of TUCASortKeyItem;
  435. TCategoryMask = set of 0..31;
  436. const
  437. DEFAULT_UCA_COMPARISON_STRENGTH = 3;
  438. function ComputeSortKey(
  439. const AString : UnicodeString;
  440. const ACollation : PUCA_DataBook
  441. ) : TUCASortKey;inline;overload;
  442. function ComputeSortKey(
  443. const AStr : PUnicodeChar;
  444. const ALength : SizeInt;
  445. const ACollation : PUCA_DataBook
  446. ) : TUCASortKey;overload;
  447. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  448. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  449. function IncrementalCompareString(
  450. const AStrA : PUnicodeChar;
  451. const ALengthA : SizeInt;
  452. const AStrB : PUnicodeChar;
  453. const ALengthB : SizeInt;
  454. const ACollation : PUCA_DataBook
  455. ) : Integer;overload;
  456. function IncrementalCompareString(
  457. const AStrA,
  458. AStrB : UnicodeString;
  459. const ACollation : PUCA_DataBook
  460. ) : Integer;inline;overload;
  461. function FilterString(
  462. const AStr : PUnicodeChar;
  463. const ALength : SizeInt;
  464. const AExcludedMask : TCategoryMask
  465. ) : UnicodeString;overload;
  466. function FilterString(
  467. const AStr : UnicodeString;
  468. const AExcludedMask : TCategoryMask
  469. ) : UnicodeString;overload;inline;
  470. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
  471. function RegisterCollation(
  472. const ACollation : PUCA_DataBook;
  473. const AAliasList : array of UnicodeString
  474. ) : Boolean;overload;
  475. function RegisterCollation(
  476. ADirectory, ALanguage : UnicodeString
  477. ) : Boolean;overload;
  478. function AddAliasCollation(
  479. ACollation : PUCA_DataBook;
  480. AALias : UnicodeString
  481. ) : Boolean;
  482. function UnregisterCollation(AName : UnicodeString): Boolean;
  483. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  484. function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
  485. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  486. function GetCollationCount() : Integer;
  487. procedure PrepareCollation(
  488. ACollation : PUCA_DataBook;
  489. const ABaseName : UnicodeString;
  490. const AChangedFields : TCollationFields
  491. );
  492. function LoadCollation(
  493. const AData : Pointer;
  494. const ADataLength : Integer;
  495. var AAliases : TUnicodeStringArray
  496. ) : PUCA_DataBook;overload;
  497. function LoadCollation(
  498. const AData : Pointer;
  499. const ADataLength : Integer
  500. ) : PUCA_DataBook;overload;
  501. function LoadCollation(
  502. const AFileName : UnicodeString;
  503. var AAliases : TUnicodeStringArray
  504. ) : PUCA_DataBook;overload;
  505. function LoadCollation(
  506. const AFileName : UnicodeString
  507. ) : PUCA_DataBook;overload;
  508. function LoadCollation(
  509. const ADirectory,
  510. ALanguage : UnicodeString;
  511. var AAliases : TUnicodeStringArray
  512. ) : PUCA_DataBook;overload;
  513. function LoadCollation(
  514. const ADirectory,
  515. ALanguage : UnicodeString
  516. ) : PUCA_DataBook;overload;
  517. procedure FreeCollation(AItem : PUCA_DataBook);
  518. type
  519. TSetOfByte = set of Byte;
  520. function BytesToString(
  521. const ABytes : array of Byte;
  522. const AValideChars : TSetOfByte
  523. ) : UnicodeString;
  524. function BytesToName(
  525. const ABytes : array of Byte
  526. ) : UnicodeString;
  527. type
  528. TEndianKind = (Little, Big);
  529. const
  530. ENDIAN_SUFFIX : array[TEndianKind] of UnicodeString = ('le','be');
  531. {$IFDEF ENDIAN_LITTLE}
  532. ENDIAN_NATIVE = TEndianKind.Little;
  533. ENDIAN_NON_NATIVE = TEndianKind.Big;
  534. {$ENDIF ENDIAN_LITTLE}
  535. {$IFDEF ENDIAN_BIG}
  536. ENDIAN_NATIVE = TEndianKind.Big;
  537. ENDIAN_NON_NATIVE = TEndianKind.Little;
  538. {$ENDIF ENDIAN_BIG}
  539. resourcestring
  540. SCollationNotFound = 'Collation not found : "%s".';
  541. implementation
  542. uses
  543. unicodenumtable;
  544. type
  545. TCardinalRec = packed record
  546. {$ifdef ENDIAN_LITTLE}
  547. byte0, byte1, byte2, byte3 : Byte;
  548. {$else ENDIAN_LITTLE}
  549. byte3, byte2, byte1, byte0 : Byte;
  550. {$endif ENDIAN_LITTLE}
  551. end;
  552. TWordRec = packed record
  553. {$ifdef ENDIAN_LITTLE}
  554. byte0, byte1 : Byte;
  555. {$else ENDIAN_LITTLE}
  556. byte1, byte0 : Byte;
  557. {$endif ENDIAN_LITTLE}
  558. end;
  559. const
  560. BYTES_OF_VALID_NAME_CHARS : set of Byte = [
  561. Ord('a')..Ord('z'), Ord('A')..Ord('Z'), Ord('-'),Ord('_')
  562. ];
  563. function BytesToString(
  564. const ABytes : array of Byte;
  565. const AValideChars : TSetOfByte
  566. ) : UnicodeString;
  567. var
  568. c, i, rl : Integer;
  569. pr : PWord;
  570. begin
  571. rl := 0;
  572. c := Length(ABytes);
  573. if (c > 0) then begin
  574. for i := 0 to c-1 do begin
  575. if not(ABytes[i] in AValideChars) then
  576. break;
  577. rl := rl+1;
  578. end;
  579. end;
  580. SetLength(Result,rl);
  581. if (rl > 0) then begin
  582. pr := PWord(@Result[1]);
  583. for i := 0 to rl-1 do begin
  584. pr^ := ABytes[i];
  585. Inc(pr);
  586. end;
  587. end;
  588. end;
  589. function BytesToName(
  590. const ABytes : array of Byte
  591. ) : UnicodeString;
  592. begin
  593. Result := BytesToString(ABytes,BYTES_OF_VALID_NAME_CHARS);
  594. end;
  595. { TCollationTable }
  596. function TCollationTable.GetCapacity : Integer;
  597. begin
  598. Result := Length(FItems);
  599. end;
  600. function TCollationTable.GetCount : Integer;
  601. begin
  602. if (FCount < 0) or (Length(FItems) < 1) or (FCount > Length(FItems)) then
  603. FCount := 0;
  604. Result := FCount;
  605. end;
  606. function TCollationTable.GetItem(const AIndex : Integer) : PCollationTableItem;
  607. begin
  608. if (AIndex < 0) or (AIndex >= Count) then
  609. Error(reRangeError);
  610. Result := @FItems[AIndex];
  611. end;
  612. procedure TCollationTable.Grow();
  613. var
  614. c0, c1 : Integer;
  615. begin
  616. c0 := Length(FItems);
  617. if (c0 < 1) then begin
  618. c0 := 1;
  619. if (FCount < 0) then
  620. FCount := 0;
  621. end;
  622. c1 := 2*c0;
  623. c0 := Length(FItems);
  624. SetLength(FItems,c1);
  625. FillChar(FItems[c0],((c1-c0)*SizeOf(TCollationTableItem)),#0);
  626. end;
  627. procedure TCollationTable.ClearItem(AItem : PCollationTableItem);
  628. begin
  629. if (AItem = nil) then
  630. exit;
  631. AItem^.Collation := nil;
  632. SetLength(AItem^.Aliases,0);
  633. end;
  634. procedure TCollationTable.AddAlias(
  635. AItem : PCollationTableItem;
  636. AAlias : UnicodeString
  637. );
  638. var
  639. n : UnicodeString;
  640. c, i : Integer;
  641. begin
  642. n := NormalizeName(AAlias);
  643. if (n = '') then
  644. exit;
  645. c := Length(AItem^.Aliases);
  646. if (c > 0) then begin
  647. for i := 0 to c-1 do begin
  648. if (AItem^.Aliases[i] = n) then
  649. exit;
  650. end;
  651. end;
  652. SetLength(AItem^.Aliases,(c+1));
  653. AItem^.Aliases[c] := n;
  654. end;
  655. class function TCollationTable.NormalizeName(
  656. AName : UnicodeString
  657. ) : UnicodeString;
  658. var
  659. r : UnicodeString;
  660. c, i, rl : Integer;
  661. cx : Word;
  662. begin
  663. c := Length(AName);
  664. rl := 0;
  665. SetLength(r,c);
  666. for i := 1 to c do begin
  667. case Ord(AName[i]) of
  668. Ord('a')..Ord('z') : cx := Ord(AName[i]);
  669. Ord('A')..Ord('Z') : cx := Ord(AName[i])+(Ord('a')-Ord('A'));
  670. Ord('0')..Ord('9'),
  671. Ord('-'), Ord('_') : cx := Ord(AName[i]);
  672. else
  673. cx := 0;
  674. end;
  675. if (cx > 0) then begin
  676. rl := rl+1;
  677. r[rl] := UnicodeChar(cx);
  678. end;
  679. end;
  680. SetLength(r,rl);
  681. Result := r;
  682. end;
  683. procedure TCollationTable.Clear();
  684. var
  685. p : PCollationTableItem;
  686. i : Integer;
  687. begin
  688. if (Count < 1) then
  689. exit;
  690. p := @FItems[0];
  691. for i := 0 to Count-1 do begin;
  692. ClearItem(p);
  693. Inc(p);
  694. end;
  695. FCount := 0;
  696. end;
  697. function TCollationTable.IndexOf(AName : UnicodeString) : Integer;
  698. var
  699. c, i, k : Integer;
  700. p : PCollationTableItem;
  701. n : UnicodeString;
  702. begin
  703. c := Count;
  704. if (c > 0) then begin
  705. // Names
  706. n := NormalizeName(AName);
  707. p := @FItems[0];
  708. for i := 0 to c-1 do begin
  709. if (Length(p^.Aliases) > 0) and (p^.Aliases[0] = n) then
  710. exit(i);
  711. Inc(p);
  712. end;
  713. // Aliases
  714. p := @FItems[0];
  715. for i := 0 to c-1 do begin
  716. if (Length(p^.Aliases) > 1) then begin
  717. for k := 1 to Length(p^.Aliases)-1 do begin
  718. if (p^.Aliases[k] = n) then
  719. exit(i);
  720. end;
  721. end;
  722. Inc(p);
  723. end;
  724. end;
  725. Result := -1;
  726. end;
  727. function TCollationTable.IndexOf(ACollation : PUCA_DataBook) : Integer;
  728. var
  729. c, i : Integer;
  730. p : PCollationTableItem;
  731. begin
  732. c := Count;
  733. if (c > 0) then begin
  734. p := @FItems[0];
  735. for i := 0 to c-1 do begin
  736. if (p^.Collation = ACollation) then
  737. exit(i);
  738. Inc(p);
  739. end;
  740. end;
  741. Result := -1;
  742. end;
  743. function TCollationTable.Find(AName : UnicodeString) : PCollationTableItem;
  744. var
  745. i : Integer;
  746. begin
  747. i := IndexOf(AName);
  748. if (i >= 0) then
  749. Result := @FItems[i]
  750. else
  751. Result := nil;
  752. end;
  753. function TCollationTable.Find(ACollation : PUCA_DataBook) : PCollationTableItem;
  754. var
  755. i : Integer;
  756. begin
  757. i := IndexOf(ACollation);
  758. if (i >= 0) then
  759. Result := @FItems[i]
  760. else
  761. Result := nil;
  762. end;
  763. function TCollationTable.Add(ACollation : PUCA_DataBook) : Integer;
  764. var
  765. c : Integer;
  766. p : PCollationTableItem;
  767. begin
  768. Result := IndexOf(ACollation);
  769. if (Result < 0) then begin
  770. c := Count;
  771. if (c >= Capacity) then
  772. Grow();
  773. p := @FItems[c];
  774. p^.Collation := ACollation;
  775. SetLength(p^.Aliases,1);
  776. p^.Aliases[0] := NormalizeName(BytesToName(ACollation^.CollationName));
  777. FCount := FCount+1;
  778. Result := c;
  779. end;
  780. end;
  781. function TCollationTable.AddAlias(AName, AAlias : UnicodeString) : Boolean;
  782. var
  783. p : PCollationTableItem;
  784. begin
  785. p := Find(AName);
  786. Result := (p <> nil);
  787. if Result then
  788. AddAlias(p,AAlias);
  789. end;
  790. function TCollationTable.Remove(AIndex : Integer) : PUCA_DataBook;
  791. var
  792. p, q : PCollationTableItem;
  793. c, i : Integer;
  794. begin
  795. if (AIndex < 0) or (AIndex >= Count) then
  796. Error(reRangeError);
  797. p := @FItems[AIndex];
  798. Result := p^.Collation;
  799. ClearItem(p);
  800. c := Count;
  801. if (AIndex < (c-1)) then begin
  802. for i := AIndex+1 to c-1 do begin
  803. q := p;
  804. Inc(p);
  805. Move(p^,q^,SizeOf(TCollationTableItem));
  806. end;
  807. FillChar(p^,SizeOf(TCollationTableItem),#0);
  808. end;
  809. FCount := FCount-1;
  810. end;
  811. { TUInt24Rec }
  812. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  813. begin
  814. TCardinalRec(Result).byte0 := a.byte0;
  815. TCardinalRec(Result).byte1 := a.byte1;
  816. TCardinalRec(Result).byte2 := a.byte2;
  817. TCardinalRec(Result).byte3 := 0;
  818. end;
  819. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  820. begin
  821. Result := Cardinal(a);
  822. end;
  823. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  824. begin
  825. {$IFOPT R+}
  826. if (a > $FFFF) then
  827. Error(reIntOverflow);
  828. {$ENDIF R+}
  829. TWordRec(Result).byte0 := a.byte0;
  830. TWordRec(Result).byte1 := a.byte1;
  831. end;
  832. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  833. begin
  834. {$IFOPT R+}
  835. if (a > $FF) then
  836. Error(reIntOverflow);
  837. {$ENDIF R+}
  838. Result := a.byte0;
  839. end;
  840. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  841. begin
  842. {$IFOPT R+}
  843. if (a > $FFFFFF) then
  844. Error(reIntOverflow);
  845. {$ENDIF R+}
  846. Result.byte0 := TCardinalRec(a).byte0;
  847. Result.byte1 := TCardinalRec(a).byte1;
  848. Result.byte2 := TCardinalRec(a).byte2;
  849. end;
  850. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  851. begin
  852. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  853. end;
  854. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  855. begin
  856. Result := (TCardinalRec(b).byte3 = 0) and
  857. (a.byte0 = TCardinalRec(b).byte0) and
  858. (a.byte1 = TCardinalRec(b).byte1) and
  859. (a.byte2 = TCardinalRec(b).byte2);
  860. end;
  861. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  862. begin
  863. Result := (b = a);
  864. end;
  865. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  866. begin
  867. Result := (LongInt(a) = b);
  868. end;
  869. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  870. begin
  871. Result := (b = a);
  872. end;
  873. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  874. begin
  875. Result := (a.byte2 = 0) and
  876. (a.byte0 = TWordRec(b).byte0) and
  877. (a.byte1 = TWordRec(b).byte1);
  878. end;
  879. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  880. begin
  881. Result := (b = a);
  882. end;
  883. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  884. begin
  885. Result := (a.byte2 = 0) and
  886. (a.byte1 = 0) and
  887. (a.byte0 = b);
  888. end;
  889. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  890. begin
  891. Result := (b = a);
  892. end;
  893. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  894. begin
  895. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  896. end;
  897. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  898. begin
  899. Result := (TCardinalRec(b).byte3 <> 0) or
  900. (a.byte0 <> TCardinalRec(b).byte0) or
  901. (a.byte1 <> TCardinalRec(b).byte1) or
  902. (a.byte2 <> TCardinalRec(b).byte2);
  903. end;
  904. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  905. begin
  906. Result := (b <> a);
  907. end;
  908. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  909. begin
  910. Result := (a.byte2 > b.byte2) or
  911. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  912. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  913. end;
  914. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  915. begin
  916. Result := Cardinal(a) > b;
  917. end;
  918. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  919. begin
  920. Result := a > Cardinal(b);
  921. end;
  922. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  923. begin
  924. Result := (a.byte2 > b.byte2) or
  925. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  926. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  927. end;
  928. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  929. begin
  930. Result := Cardinal(a) >= b;
  931. end;
  932. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  933. begin
  934. Result := a >= Cardinal(b);
  935. end;
  936. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  937. begin
  938. Result := (b > a);
  939. end;
  940. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  941. begin
  942. Result := Cardinal(a) < b;
  943. end;
  944. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  945. begin
  946. Result := a < Cardinal(b);
  947. end;
  948. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  949. begin
  950. Result := (b >= a);
  951. end;
  952. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  953. begin
  954. Result := Cardinal(a) <= b;
  955. end;
  956. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  957. begin
  958. Result := a <= Cardinal(b);
  959. end;
  960. type
  961. TBitOrder = 0..7;
  962. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  963. begin
  964. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  965. end;
  966. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  967. begin
  968. if AValue then
  969. AData := AData or (1 shl (ABit mod 8))
  970. else
  971. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  972. end;
  973. {$IFNDEF HAS_COMPARE_BYTE}
  974. function CompareByte(const A, B; ALength : SizeInt):SizeInt;
  975. var
  976. pa, pb : PByte;
  977. i : Integer;
  978. begin
  979. if (ALength < 1) then
  980. exit(0);
  981. pa := PByte(@A);
  982. pb := PByte(@B);
  983. if (pa = pb) then
  984. exit(0);
  985. for i := 1 to ALength do begin
  986. if (pa^ <> pb^) then
  987. exit(i);
  988. pa := pa+1;
  989. pb := pb+1;
  990. end;
  991. Result := 0;
  992. end;
  993. {$ENDIF HAS_COMPARE_BYTE}
  994. function IndexInArrayDWord(const ABuffer : array of DWord; AItem : DWord) : SizeInt;
  995. var
  996. c, i : Integer;
  997. p : PDWord;
  998. begin
  999. Result := -1;
  1000. c := Length(ABuffer);
  1001. if (c < 1) then
  1002. exit;
  1003. p := @ABuffer[Low(ABuffer)];
  1004. for i := 1 to c do begin
  1005. if (p^ = AItem) then begin
  1006. Result := i-1;
  1007. break;
  1008. end;
  1009. p := p+1;
  1010. end;
  1011. end;
  1012. var
  1013. CollationTable : TCollationTable;
  1014. function IndexOfCollation(AName : UnicodeString) : Integer;
  1015. begin
  1016. Result := CollationTable.IndexOf(AName);
  1017. end;
  1018. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  1019. begin
  1020. Result := RegisterCollation(ACollation,[]);
  1021. end;
  1022. function RegisterCollation(
  1023. const ACollation : PUCA_DataBook;
  1024. const AAliasList : array of UnicodeString
  1025. ) : Boolean;
  1026. var
  1027. i : Integer;
  1028. p : PCollationTableItem;
  1029. begin
  1030. Result := (CollationTable.IndexOf(BytesToName(ACollation^.CollationName)) = -1);
  1031. if Result then begin
  1032. i := CollationTable.Add(ACollation);
  1033. if (Length(AAliasList) > 0) then begin
  1034. p := CollationTable[i];
  1035. for i := Low(AAliasList) to High(AAliasList) do
  1036. CollationTable.AddAlias(p,AAliasList[i]);
  1037. end;
  1038. end;
  1039. end;
  1040. function RegisterCollation(ADirectory, ALanguage : UnicodeString) : Boolean;
  1041. var
  1042. cl : PUCA_DataBook;
  1043. al : TUnicodeStringArray;
  1044. begin
  1045. al := nil;
  1046. cl := LoadCollation(ADirectory,ALanguage,al);
  1047. if (cl = nil) then
  1048. exit(False);
  1049. try
  1050. Result := RegisterCollation(cl,al);
  1051. except
  1052. FreeCollation(cl);
  1053. raise;
  1054. end;
  1055. if not Result then
  1056. FreeCollation(cl);
  1057. end;
  1058. function AddAliasCollation(
  1059. ACollation : PUCA_DataBook;
  1060. AALias : UnicodeString
  1061. ) : Boolean;
  1062. var
  1063. p : PCollationTableItem;
  1064. begin
  1065. Result := False;
  1066. if (ACollation <> nil) then begin
  1067. p := CollationTable.Find(ACollation);
  1068. if (p <> nil) then begin
  1069. CollationTable.AddAlias(p,AALias);
  1070. Result := True;
  1071. end;
  1072. end;
  1073. end;
  1074. function UnregisterCollation(AName : UnicodeString): Boolean;
  1075. var
  1076. i : Integer;
  1077. begin
  1078. i := CollationTable.IndexOf(AName);
  1079. Result := (i >= 0);
  1080. if Result then
  1081. CollationTable.Remove(i);
  1082. end;
  1083. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  1084. var
  1085. i : Integer;
  1086. p : PCollationTableItem;
  1087. begin
  1088. if AFreeDynamicCollations then begin
  1089. for i := 0 to CollationTable.Count-1 do begin
  1090. p := CollationTable[i];
  1091. if p^.Collation.Dynamic then begin
  1092. FreeCollation(p^.Collation);
  1093. p^.Collation := nil;
  1094. end;
  1095. end;
  1096. end;
  1097. CollationTable.Clear();
  1098. end;
  1099. function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
  1100. var
  1101. p : PCollationTableItem;
  1102. begin
  1103. p := CollationTable.Find(AName);
  1104. if (p <> nil) then
  1105. Result := p^.Collation
  1106. else
  1107. Result := nil;
  1108. end;
  1109. function GetCollationCount() : Integer;
  1110. begin
  1111. Result := CollationTable.Count;
  1112. end;
  1113. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  1114. var
  1115. p : PCollationTableItem;
  1116. begin
  1117. p := CollationTable[AIndex];
  1118. if (p <> nil) then
  1119. Result := p^.Collation
  1120. else
  1121. Result := nil;
  1122. end;
  1123. procedure PrepareCollation(
  1124. ACollation : PUCA_DataBook;
  1125. const ABaseName : UnicodeString;
  1126. const AChangedFields : TCollationFields
  1127. );
  1128. var
  1129. s : UnicodeString;
  1130. p, base : PUCA_DataBook;
  1131. begin
  1132. if (ABaseName <> '') then
  1133. s := ABaseName
  1134. else
  1135. s := ROOT_COLLATION_NAME;
  1136. p := ACollation;
  1137. base := FindCollation(s);
  1138. if (base = nil) then
  1139. Error(reCodesetConversion);
  1140. p^.Base := base;
  1141. if not(TCollationField.BackWard in AChangedFields) then
  1142. p^.Backwards := base^.Backwards;
  1143. if not(TCollationField.VariableLowLimit in AChangedFields) then
  1144. p^.VariableLowLimit := base^.VariableLowLimit;
  1145. if not(TCollationField.VariableHighLimit in AChangedFields) then
  1146. p^.VariableLowLimit := base^.VariableHighLimit;
  1147. if not(TCollationField.Alternate in AChangedFields) then
  1148. p^.VariableWeight := base^.VariableWeight;
  1149. if not(TCollationField.Normalization in AChangedFields) then
  1150. p^.NoNormalization := base^.NoNormalization;
  1151. if not(TCollationField.Strength in AChangedFields) then
  1152. p^.ComparisonStrength := base^.ComparisonStrength;
  1153. end;
  1154. type
  1155. TSerializedCollationHeader = packed record
  1156. Base : TCollationName;
  1157. Version : TCollationVersion;
  1158. CollationName : TCollationName;
  1159. CollationAliases : TCollationName; // ";" separated
  1160. VariableWeight : Byte;
  1161. Backwards : Byte;
  1162. BMP_Table1Length : DWord;
  1163. BMP_Table2Length : DWord;
  1164. OBMP_Table1Length : DWord;
  1165. OBMP_Table2Length : DWord;
  1166. PropCount : DWord;
  1167. VariableLowLimit : Word;
  1168. VariableHighLimit : Word;
  1169. NoNormalization : Byte;
  1170. Strength : Byte;
  1171. ChangedFields : Byte;
  1172. end;
  1173. PSerializedCollationHeader = ^TSerializedCollationHeader;
  1174. procedure FreeCollation(AItem : PUCA_DataBook);
  1175. var
  1176. h : PSerializedCollationHeader;
  1177. begin
  1178. if (AItem = nil) or not(AItem^.Dynamic) then
  1179. exit;
  1180. h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
  1181. if (AItem^.BMP_Table1 <> nil) then
  1182. FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
  1183. if (AItem^.BMP_Table2 <> nil) then
  1184. FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
  1185. if (AItem^.OBMP_Table1 <> nil) then
  1186. FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
  1187. if (AItem^.OBMP_Table2 <> nil) then
  1188. FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
  1189. if (AItem^.Props <> nil) then
  1190. FreeMem(AItem^.Props,h^.PropCount);
  1191. FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  1192. end;
  1193. function ParseAliases(AStr : UnicodeString) : TUnicodeStringArray;
  1194. var
  1195. r : TUnicodeStringArray;
  1196. c, k, i : Integer;
  1197. s : UnicodeString;
  1198. begin
  1199. SetLength(r,0);
  1200. c := Length(AStr);
  1201. k := 1;
  1202. for i := 1 to c do begin
  1203. if (AStr[i] <> ';') then begin
  1204. k := i;
  1205. break;
  1206. end;
  1207. end;
  1208. s := '';
  1209. for i := 1 to c do begin
  1210. if (AStr[i] = ';') then begin
  1211. s := Copy(AStr,k,(i-k));
  1212. end else if (i = c) then begin
  1213. s := Copy(AStr,k,(i+1-k));
  1214. end;
  1215. if (s <> '') then begin
  1216. SetLength(r,(Length(r)+1));
  1217. r[High(r)] := s;
  1218. s := '';
  1219. k := i+1;
  1220. end;
  1221. end;
  1222. Result := r;
  1223. end;
  1224. function LoadCollation(
  1225. const AData : Pointer;
  1226. const ADataLength : Integer;
  1227. var AAliases : TUnicodeStringArray
  1228. ) : PUCA_DataBook;
  1229. var
  1230. dataPointer : PByte;
  1231. readedLength : LongInt;
  1232. function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
  1233. begin
  1234. Result := (readedLength + ALength) <= ADataLength;
  1235. if not result then
  1236. exit;
  1237. Move(dataPointer^,ADest^,ALength);
  1238. Inc(dataPointer,ALength);
  1239. readedLength := readedLength + ALength;
  1240. end;
  1241. var
  1242. r : PUCA_DataBook;
  1243. h : PSerializedCollationHeader;
  1244. cfs : TCollationFields;
  1245. i : Integer;
  1246. baseName, s : UnicodeString;
  1247. begin
  1248. Result := nil;
  1249. readedLength := 0;
  1250. AAliases := nil;
  1251. dataPointer := AData;
  1252. r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  1253. try
  1254. h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
  1255. if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
  1256. exit;
  1257. r^.Version := h^.Version;
  1258. r^.CollationName := h^.CollationName;
  1259. r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
  1260. r^.Backwards[0] := IsBitON(h^.Backwards,0);
  1261. r^.Backwards[1] := IsBitON(h^.Backwards,1);
  1262. r^.Backwards[2] := IsBitON(h^.Backwards,2);
  1263. r^.Backwards[3] := IsBitON(h^.Backwards,3);
  1264. if (h^.BMP_Table1Length > 0) then begin
  1265. r^.BMP_Table1 := GetMemory(h^.BMP_Table1Length);
  1266. if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
  1267. exit;
  1268. end;
  1269. if (h^.BMP_Table2Length > 0) then begin
  1270. r^.BMP_Table2 := GetMemory(h^.BMP_Table2Length);
  1271. if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
  1272. exit;
  1273. end;
  1274. if (h^.OBMP_Table1Length > 0) then begin
  1275. r^.OBMP_Table1 := GetMemory(h^.OBMP_Table1Length);
  1276. if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
  1277. exit;
  1278. end;
  1279. if (h^.OBMP_Table2Length > 0) then begin
  1280. r^.OBMP_Table2 := GetMemory(h^.OBMP_Table2Length);
  1281. if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
  1282. exit;
  1283. end;
  1284. r^.PropCount := h^.PropCount;
  1285. if (h^.PropCount > 0) then begin
  1286. r^.Props := GetMemory(h^.PropCount);
  1287. if not ReadBuffer(r^.Props,h^.PropCount) then
  1288. exit;
  1289. end;
  1290. r^.VariableLowLimit := h^.VariableLowLimit;
  1291. r^.VariableHighLimit := h^.VariableHighLimit;
  1292. r^.NoNormalization := (h^.NoNormalization <> 0);
  1293. r^.ComparisonStrength := h^.Strength;
  1294. cfs := [];
  1295. for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
  1296. if IsBitON(h^.ChangedFields,i) then
  1297. cfs := cfs + [TCollationField(i)];
  1298. end;
  1299. baseName := BytesToName(h^.Base);
  1300. if (baseName = '') then begin
  1301. if (BytesToName(h^.CollationName) <> ROOT_COLLATION_NAME) then
  1302. baseName := ROOT_COLLATION_NAME
  1303. else
  1304. baseName := '';
  1305. end;
  1306. if (baseName <> '') then
  1307. PrepareCollation(r,baseName,cfs);
  1308. s := BytesToString(h^.CollationAliases,(BYTES_OF_VALID_NAME_CHARS+[Ord(';')]));
  1309. if (s <> '') then
  1310. AAliases := ParseAliases(s);
  1311. r^.Dynamic := True;
  1312. Result := r;
  1313. except
  1314. FreeCollation(r);
  1315. raise;
  1316. end;
  1317. end;
  1318. function LoadCollation(
  1319. const AData : Pointer;
  1320. const ADataLength : Integer
  1321. ) : PUCA_DataBook;
  1322. var
  1323. al : TUnicodeStringArray;
  1324. begin
  1325. al := nil;
  1326. Result := LoadCollation(AData,ADataLength,al);
  1327. end;
  1328. {$IFDEF HAS_PUSH}
  1329. {$PUSH}
  1330. {$ENDIF HAS_PUSH}
  1331. {$IFNDEF HAS_PUSH}
  1332. {$IFOPT I+}
  1333. {$DEFINE I_PLUS}
  1334. {$ELSE}
  1335. {$UNDEF I_PLUS}
  1336. {$ENDIF}
  1337. {$ENDIF HAS_PUSH}
  1338. function LoadCollation(
  1339. const AFileName : UnicodeString;
  1340. var AAliases : TUnicodeStringArray
  1341. ) : PUCA_DataBook;
  1342. const
  1343. BLOCK_SIZE = 16*1024;
  1344. var
  1345. f : File of Byte;
  1346. locSize, locReaded, c : LongInt;
  1347. locBuffer : PByte;
  1348. locBlockSize : LongInt;
  1349. begin
  1350. Result := nil;
  1351. {$I-}
  1352. if (AFileName = '') then
  1353. exit;
  1354. Assign(f,AFileName);
  1355. Reset(f);
  1356. try
  1357. if (IOResult <> 0) then
  1358. exit;
  1359. locSize := FileSize(f);
  1360. if (locSize < SizeOf(TSerializedCollationHeader)) then
  1361. exit;
  1362. locBuffer := GetMemory(locSize);
  1363. try
  1364. locBlockSize := BLOCK_SIZE;
  1365. locReaded := 0;
  1366. while (locReaded < locSize) do begin
  1367. if (locBlockSize > (locSize-locReaded)) then
  1368. locBlockSize := locSize-locReaded;
  1369. BlockRead(f,locBuffer[locReaded],locBlockSize,c);
  1370. if (IOResult <> 0) or (c <= 0) then
  1371. exit;
  1372. locReaded := locReaded + c;
  1373. end;
  1374. Result := LoadCollation(locBuffer,locSize,AAliases);
  1375. finally
  1376. FreeMemory(locBuffer);
  1377. end;
  1378. finally
  1379. Close(f);
  1380. end;
  1381. end;
  1382. function LoadCollation(
  1383. const AFileName : UnicodeString
  1384. ) : PUCA_DataBook;
  1385. var
  1386. al : TUnicodeStringArray;
  1387. begin
  1388. al := nil;
  1389. Result := LoadCollation(AFileName,al);
  1390. end;
  1391. {$IFDEF HAS_PUSH}
  1392. {$POP}
  1393. {$ELSE}
  1394. {$IFDEF I_PLUS}
  1395. {$I+}
  1396. {$ELSE}
  1397. {$I-}
  1398. {$ENDIF}
  1399. {$ENDIF HAS_PUSH}
  1400. function LoadCollation(
  1401. const ADirectory,
  1402. ALanguage : UnicodeString;
  1403. var AAliases : TUnicodeStringArray
  1404. ) : PUCA_DataBook;
  1405. var
  1406. fileName : UnicodeString;
  1407. begin
  1408. fileName := ADirectory;
  1409. if (fileName <> '') then begin
  1410. if (fileName[Length(fileName)] <> DirectorySeparator) then
  1411. fileName := fileName + DirectorySeparator;
  1412. end;
  1413. fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
  1414. Result := LoadCollation(fileName,AAliases);
  1415. end;
  1416. function LoadCollation(
  1417. const ADirectory,
  1418. ALanguage : UnicodeString
  1419. ) : PUCA_DataBook;
  1420. var
  1421. al : TUnicodeStringArray;
  1422. begin
  1423. al := nil;
  1424. Result := LoadCollation(ADirectory,ALanguage,al);
  1425. end;
  1426. {$INCLUDE unicodedata.inc}
  1427. {$IFDEF ENDIAN_LITTLE}
  1428. {$INCLUDE unicodedata_le.inc}
  1429. {$ENDIF ENDIAN_LITTLE}
  1430. {$IFDEF ENDIAN_BIG}
  1431. {$INCLUDE unicodedata_be.inc}
  1432. {$ENDIF ENDIAN_BIG}
  1433. procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
  1434. begin
  1435. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  1436. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  1437. end;
  1438. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  1439. begin
  1440. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  1441. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  1442. end;
  1443. function UnicodeIsSurrogatePair(
  1444. const AHighSurrogate,
  1445. ALowSurrogate : UnicodeChar
  1446. ) : Boolean;
  1447. begin
  1448. Result :=
  1449. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  1450. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  1451. ) and
  1452. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  1453. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  1454. )
  1455. end;
  1456. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  1457. begin
  1458. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  1459. (Word(AValue) <= HIGH_SURROGATE_END);
  1460. end;
  1461. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  1462. begin
  1463. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  1464. (Word(AValue) <= LOW_SURROGATE_END);
  1465. end;
  1466. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  1467. begin
  1468. Result:=
  1469. @UC_PROP_ARRAY[
  1470. UC_TABLE_3[
  1471. UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
  1472. [lo(ACodePoint) shr 4]
  1473. ][lo(ACodePoint) and $F]
  1474. ]; {
  1475. @UC_PROP_ARRAY[
  1476. UC_TABLE_2[
  1477. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  1478. WordRec(ACodePoint).Lo
  1479. ]
  1480. ];}
  1481. end;
  1482. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  1483. begin
  1484. Result:=
  1485. @UC_PROP_ARRAY[
  1486. UCO_TABLE_3[
  1487. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  1488. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  1489. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  1490. ]; {
  1491. Result:=
  1492. @UC_PROP_ARRAY[
  1493. UCO_TABLE_2[
  1494. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1495. Word(ALowS) - LOW_SURROGATE_BEGIN
  1496. ]
  1497. ]; }
  1498. end;
  1499. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  1500. var
  1501. l, h : UnicodeChar;
  1502. begin
  1503. if (ACodePoint <= High(Word)) then
  1504. exit(GetProps(Word(ACodePoint)));
  1505. FromUCS4(ACodePoint,h,l);
  1506. Result := GetProps(h,l);
  1507. end;
  1508. function UnicodeToUpper(
  1509. const AString : UnicodeString;
  1510. const AIgnoreInvalidSequence : Boolean;
  1511. out AResultString : UnicodeString
  1512. ) : Integer;
  1513. var
  1514. i, c : SizeInt;
  1515. pp, pr : PUnicodeChar;
  1516. pu : PUC_Prop;
  1517. locIsSurrogate : Boolean;
  1518. r : UnicodeString;
  1519. begin
  1520. c := Length(AString);
  1521. SetLength(r,2*c);
  1522. if (c > 0) then begin
  1523. pp := @AString[1];
  1524. pr := @r[1];
  1525. i := 1;
  1526. while (i <= c) do begin
  1527. pu := GetProps(Word(pp^));
  1528. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1529. if locIsSurrogate then begin
  1530. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  1531. if AIgnoreInvalidSequence then begin
  1532. pr^ := pp^;
  1533. Inc(pp);
  1534. Inc(pr);
  1535. Inc(i);
  1536. Continue;
  1537. end;
  1538. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  1539. end;
  1540. pu := GetProps(pp^,AString[i+1]);
  1541. end;
  1542. if (pu^.SimpleUpperCase = 0) then begin
  1543. pr^ := pp^;
  1544. if locIsSurrogate then begin
  1545. Inc(pp);
  1546. Inc(pr);
  1547. Inc(i);
  1548. pr^ := pp^;
  1549. end;
  1550. end else begin
  1551. if (pu^.SimpleUpperCase <= $FFFF) then begin
  1552. pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
  1553. end else begin
  1554. FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1555. Inc(pr);
  1556. end;
  1557. if locIsSurrogate then begin
  1558. Inc(pp);
  1559. Inc(i);
  1560. end;
  1561. end;
  1562. Inc(pp);
  1563. Inc(pr);
  1564. Inc(i);
  1565. end;
  1566. Dec(pp);
  1567. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1568. SetLength(r,i);
  1569. AResultString := r;
  1570. end;
  1571. Result := 0;
  1572. end;
  1573. function UnicodeToLower(
  1574. const AString : UnicodeString;
  1575. const AIgnoreInvalidSequence : Boolean;
  1576. out AResultString : UnicodeString
  1577. ) : Integer;
  1578. var
  1579. i, c : SizeInt;
  1580. pp, pr : PUnicodeChar;
  1581. pu : PUC_Prop;
  1582. locIsSurrogate : Boolean;
  1583. r : UnicodeString;
  1584. begin
  1585. c := Length(AString);
  1586. SetLength(r,2*c);
  1587. if (c > 0) then begin
  1588. pp := @AString[1];
  1589. pr := @r[1];
  1590. i := 1;
  1591. while (i <= c) do begin
  1592. pu := GetProps(Word(pp^));
  1593. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1594. if locIsSurrogate then begin
  1595. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  1596. if AIgnoreInvalidSequence then begin
  1597. pr^ := pp^;
  1598. Inc(pp);
  1599. Inc(pr);
  1600. Inc(i);
  1601. Continue;
  1602. end;
  1603. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  1604. end;
  1605. pu := GetProps(pp^,AString[i+1]);
  1606. end;
  1607. if (pu^.SimpleLowerCase = 0) then begin
  1608. pr^ := pp^;
  1609. if locIsSurrogate then begin
  1610. Inc(pp);
  1611. Inc(pr);
  1612. Inc(i);
  1613. pr^ := pp^;
  1614. end;
  1615. end else begin
  1616. if (pu^.SimpleLowerCase <= $FFFF) then begin
  1617. pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
  1618. end else begin
  1619. FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1620. Inc(pr);
  1621. end;
  1622. if locIsSurrogate then begin
  1623. Inc(pp);
  1624. Inc(i);
  1625. end;
  1626. end;
  1627. Inc(pp);
  1628. Inc(pr);
  1629. Inc(i);
  1630. end;
  1631. Dec(pp);
  1632. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1633. SetLength(r,i);
  1634. AResultString := r;
  1635. end;
  1636. Result := 0;
  1637. end;
  1638. //----------------------------------------------------------------------
  1639. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  1640. const
  1641. SBase = $AC00;
  1642. LBase = $1100;
  1643. VBase = $1161;
  1644. TBase = $11A7;
  1645. LCount = 19;
  1646. VCount = 21;
  1647. TCount = 28;
  1648. NCount = VCount * TCount; // 588
  1649. SCount = LCount * NCount; // 11172
  1650. var
  1651. SIndex, L, V, T : Integer;
  1652. begin
  1653. SIndex := AChar - SBase;
  1654. if (SIndex < 0) or (SIndex >= SCount) then begin
  1655. ABuffer^ := AChar;
  1656. exit(1);
  1657. end;
  1658. L := LBase + SIndex div NCount;
  1659. V := VBase + (SIndex mod NCount) div TCount;
  1660. T := TBase + SIndex mod TCount;
  1661. ABuffer[0] := L;
  1662. ABuffer[1] := V;
  1663. Result := 2;
  1664. if (T <> TBase) then begin
  1665. ABuffer[2] := T;
  1666. Inc(Result);
  1667. end;
  1668. end;
  1669. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  1670. var
  1671. locStack : array[0..23] of Cardinal;
  1672. locStackIdx : Integer;
  1673. ResultBuffer : array[0..23] of Cardinal;
  1674. ResultIdx : Integer;
  1675. procedure AddCompositionToStack(const AIndex : Integer);
  1676. var
  1677. pdecIdx : ^TDecompositionIndexRec;
  1678. k, kc : Integer;
  1679. pu : ^UInt24;
  1680. begin
  1681. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  1682. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  1683. kc := pdecIdx^.Length;
  1684. Inc(pu,kc);
  1685. for k := 1 to kc do begin
  1686. Dec(pu);
  1687. locStack[locStackIdx + k] := pu^;
  1688. end;
  1689. locStackIdx := locStackIdx + kc;
  1690. end;
  1691. procedure AddResult(const AChar : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  1692. begin
  1693. Inc(ResultIdx);
  1694. ResultBuffer[ResultIdx] := AChar;
  1695. end;
  1696. function PopStack() : Cardinal;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  1697. begin
  1698. Result := locStack[locStackIdx];
  1699. Dec(locStackIdx);
  1700. end;
  1701. var
  1702. cu : Cardinal;
  1703. decIdx : SmallInt;
  1704. locIsWord : Boolean;
  1705. i : Integer;
  1706. p : PUnicodeChar;
  1707. begin
  1708. ResultIdx := -1;
  1709. locStackIdx := -1;
  1710. AddCompositionToStack(ADecomposeIndex);
  1711. while (locStackIdx >= 0) do begin
  1712. cu := PopStack();
  1713. locIsWord := (cu <= MAX_WORD);
  1714. if locIsWord then
  1715. decIdx := GetProps(Word(cu))^.DecompositionID
  1716. else
  1717. decIdx := GetProps(cu)^.DecompositionID;
  1718. if (decIdx = -1) then
  1719. AddResult(cu)
  1720. else
  1721. AddCompositionToStack(decIdx);
  1722. end;
  1723. p := ABuffer;
  1724. Result := 0;
  1725. for i := 0 to ResultIdx do begin
  1726. cu := ResultBuffer[i];
  1727. if (cu <= MAX_WORD) then begin
  1728. p[0] := UnicodeChar(Word(cu));
  1729. Inc(p);
  1730. end else begin
  1731. FromUCS4(cu,p[0],p[1]);
  1732. Inc(p,2);
  1733. Inc(Result);
  1734. end;
  1735. end;
  1736. Result := Result + ResultIdx + 1;
  1737. end;
  1738. procedure CanonicalOrder(var AString : UnicodeString);
  1739. begin
  1740. CanonicalOrder(@AString[1],Length(AString));
  1741. end;
  1742. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  1743. var
  1744. i, c : SizeInt;
  1745. p, q : PUnicodeChar;
  1746. locIsSurrogateP, locIsSurrogateQ : Boolean;
  1747. procedure Swap();
  1748. var
  1749. t, t1 : UnicodeChar;
  1750. begin
  1751. if not locIsSurrogateP then begin
  1752. if not locIsSurrogateQ then begin
  1753. t := p^;
  1754. p^ := q^;
  1755. q^ := t;
  1756. exit;
  1757. end;
  1758. t := p^;
  1759. p[0] := q[0];
  1760. p[1] := q[1];
  1761. q[1] := t;
  1762. exit;
  1763. end;
  1764. if not locIsSurrogateQ then begin
  1765. t := q[0];
  1766. p[2] := p[1];
  1767. p[1] := p[0];
  1768. p[0] := t;
  1769. exit;
  1770. end;
  1771. t := p[0];
  1772. t1 := p[1];
  1773. p[0] := q[0];
  1774. p[1] := q[1];
  1775. q[0] := t;
  1776. q[1] := t1;
  1777. end;
  1778. var
  1779. pu : PUC_Prop;
  1780. cccp, cccq : Byte;
  1781. begin
  1782. c := ALength;
  1783. if (c < 2) then
  1784. exit;
  1785. p := AStr;
  1786. i := 1;
  1787. while (i < c) do begin
  1788. pu := GetProps(Word(p^));
  1789. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  1790. if locIsSurrogateP then begin
  1791. if (i = (c - 1)) then
  1792. Break;
  1793. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  1794. Inc(p);
  1795. Inc(i);
  1796. Continue;
  1797. end;
  1798. pu := GetProps(p[0],p[1]);
  1799. end;
  1800. if (pu^.CCC > 0) then begin
  1801. cccp := pu^.CCC;
  1802. if locIsSurrogateP then
  1803. q := p + 2
  1804. else
  1805. q := p + 1;
  1806. pu := GetProps(Word(q^));
  1807. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  1808. if locIsSurrogateQ then begin
  1809. if (i = c) then
  1810. Break;
  1811. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  1812. Inc(p);
  1813. Inc(i);
  1814. Continue;
  1815. end;
  1816. pu := GetProps(q[0],q[1]);
  1817. end;
  1818. cccq := pu^.CCC;
  1819. if (cccq > 0) and (cccp > cccq) then begin
  1820. Swap();
  1821. if (i > 1) then begin
  1822. Dec(p);
  1823. Dec(i);
  1824. pu := GetProps(Word(p^));
  1825. if (pu^.Category = UGC_Surrogate) then begin
  1826. if (i > 1) then begin
  1827. Dec(p);
  1828. Dec(i);
  1829. end;
  1830. end;
  1831. Continue;
  1832. end;
  1833. end;
  1834. end;
  1835. if locIsSurrogateP then begin
  1836. Inc(p);
  1837. Inc(i);
  1838. end;
  1839. Inc(p);
  1840. Inc(i);
  1841. end;
  1842. end;
  1843. //Canonical Decomposition
  1844. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  1845. begin
  1846. Result := NormalizeNFD(@AString[1],Length(AString));
  1847. end;
  1848. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  1849. const MAX_EXPAND = 3;
  1850. var
  1851. i, c, kc, k : SizeInt;
  1852. pp, pr : PUnicodeChar;
  1853. pu : PUC_Prop;
  1854. locIsSurrogate : Boolean;
  1855. cpArray : array[0..7] of Cardinal;
  1856. cp : Cardinal;
  1857. begin
  1858. c := ALength;
  1859. SetLength(Result,(MAX_EXPAND*c));
  1860. if (c > 0) then begin
  1861. pp := AStr;
  1862. pr := @Result[1];
  1863. i := 1;
  1864. while (i <= c) do begin
  1865. pu := GetProps(Word(pp^));
  1866. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1867. if locIsSurrogate then begin
  1868. if (i = c) then
  1869. Break;
  1870. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  1871. pr^ := pp^;
  1872. Inc(pp);
  1873. Inc(pr);
  1874. Inc(i);
  1875. Continue;
  1876. end;
  1877. pu := GetProps(pp[0],pp[1]);
  1878. end;
  1879. if pu^.HangulSyllable then begin
  1880. if locIsSurrogate then begin
  1881. cp := ToUCS4(pp[0],pp[1]);
  1882. Inc(pp);
  1883. Inc(i);
  1884. end else begin
  1885. cp := Word(pp^);
  1886. end;
  1887. kc := DecomposeHangul(cp,@cpArray[0]);
  1888. for k := 0 to kc - 1 do begin
  1889. if (cpArray[k] <= MAX_WORD) then begin
  1890. pr^ := UnicodeChar(Word(cpArray[k]));
  1891. pr := pr + 1;
  1892. end else begin
  1893. FromUCS4(cpArray[k],pr[0],pr[1]);
  1894. pr := pr + 2;
  1895. end;
  1896. end;
  1897. if (kc > 0) then
  1898. Dec(pr);
  1899. end else begin
  1900. if (pu^.DecompositionID = -1) then begin
  1901. pr^ := pp^;
  1902. if locIsSurrogate then begin
  1903. Inc(pp);
  1904. Inc(pr);
  1905. Inc(i);
  1906. pr^ := pp^;
  1907. end;
  1908. end else begin
  1909. k := Decompose(pu^.DecompositionID,pr);
  1910. pr := pr + (k - 1);
  1911. if locIsSurrogate then begin
  1912. Inc(pp);
  1913. Inc(i);
  1914. end;
  1915. end;
  1916. end;
  1917. Inc(pp);
  1918. Inc(pr);
  1919. Inc(i);
  1920. end;
  1921. Dec(pp);
  1922. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  1923. SetLength(Result,i);
  1924. CanonicalOrder(@Result[1],Length(Result));
  1925. end;
  1926. end;
  1927. { TUCA_PropItemContextTreeNodeRec }
  1928. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  1929. begin
  1930. if (Self.Left = 0) then
  1931. Result := nil
  1932. else
  1933. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  1934. end;
  1935. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  1936. begin
  1937. if (Self.Right = 0) then
  1938. Result := nil
  1939. else
  1940. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  1941. end;
  1942. { TUCA_PropItemContextRec }
  1943. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  1944. begin
  1945. Result := PUInt24(
  1946. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  1947. SizeOf(Self.WeightCount)
  1948. );
  1949. end;
  1950. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  1951. begin
  1952. Result := PUCA_PropWeights(
  1953. PtrUInt(@Self) +
  1954. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  1955. (Self.CodePointCount*SizeOf(UInt24))
  1956. );
  1957. end;
  1958. { TUCA_PropItemContextTreeRec }
  1959. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  1960. begin
  1961. if (Size = 0) then
  1962. Result := nil
  1963. else
  1964. Result := PUCA_PropItemContextTreeNodeRec(
  1965. PtrUInt(
  1966. PtrUInt(@Self) + SizeOf(UInt24){Size}
  1967. )
  1968. );
  1969. end;
  1970. function CompareCodePoints(
  1971. A : PUInt24; LA : Integer;
  1972. B : PUInt24; LB : Integer
  1973. ) : Integer;
  1974. var
  1975. i, hb : Integer;
  1976. begin
  1977. if (A = B) then
  1978. exit(0);
  1979. Result := 1;
  1980. hb := LB - 1;
  1981. for i := 0 to LA - 1 do begin
  1982. if (i > hb) then
  1983. exit;
  1984. if (A[i] < B[i]) then
  1985. exit(-1);
  1986. if (A[i] > B[i]) then
  1987. exit(1);
  1988. end;
  1989. if (LA = LB) then
  1990. exit(0);
  1991. exit(-1);
  1992. end;
  1993. function TUCA_PropItemContextTreeRec.Find(
  1994. const AChars : PUInt24;
  1995. const ACharCount : Integer;
  1996. out ANode : PUCA_PropItemContextTreeNodeRec
  1997. ) : Boolean;
  1998. var
  1999. t : PUCA_PropItemContextTreeNodeRec;
  2000. begin
  2001. t := Data;
  2002. while (t <> nil) do begin
  2003. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  2004. 0 : Break;
  2005. -1 : t := t^.GetLeftNode();
  2006. else
  2007. t := t^.GetRightNode();
  2008. end;
  2009. end;
  2010. Result := (t <> nil);
  2011. if Result then
  2012. ANode := t;
  2013. end;
  2014. { TUC_Prop }
  2015. function TUC_Prop.GetCategory: Byte;
  2016. begin
  2017. Result := Byte((CategoryData and Byte($F8)) shr 3);
  2018. end;
  2019. function TUC_Prop.GetNumericValue: Double;
  2020. begin
  2021. Result := UC_NUMERIC_ARRAY[NumericIndex];
  2022. end;
  2023. procedure TUC_Prop.SetCategory(AValue: Byte);
  2024. begin
  2025. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  2026. end;
  2027. function TUC_Prop.GetWhiteSpace: Boolean;
  2028. begin
  2029. Result := IsBitON(CategoryData,0);
  2030. end;
  2031. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  2032. begin
  2033. SetBit(CategoryData,0,AValue);
  2034. end;
  2035. function TUC_Prop.GetHangulSyllable: Boolean;
  2036. begin
  2037. Result := IsBitON(CategoryData,1);
  2038. end;
  2039. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  2040. begin
  2041. SetBit(CategoryData,1,AValue);
  2042. end;
  2043. { TUCA_DataBook }
  2044. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  2045. begin
  2046. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  2047. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  2048. end;
  2049. { TUCA_PropItemRec }
  2050. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  2051. begin
  2052. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  2053. end;
  2054. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  2055. begin
  2056. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  2057. end;
  2058. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  2059. begin
  2060. if HasCodePoint() then begin
  2061. if Contextual then
  2062. Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
  2063. PUInt24(
  2064. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  2065. Cardinal(GetContext()^.Size)
  2066. )^
  2067. )
  2068. else
  2069. Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
  2070. PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  2071. )
  2072. end else begin
  2073. {$ifdef uni_debug}
  2074. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  2075. {$else uni_debug}
  2076. Result := ZERO_UINT24;
  2077. {$endif uni_debug}
  2078. end
  2079. end;
  2080. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  2081. begin
  2082. Result := IsBitON(Flags,FLAG_CODEPOINT);
  2083. end;
  2084. function TUCA_PropItemRec.IsValid() : Boolean;
  2085. begin
  2086. Result := IsBitON(Flags,FLAG_VALID);
  2087. end;
  2088. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  2089. begin
  2090. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  2091. end;}
  2092. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  2093. var
  2094. c : Integer;
  2095. p : PByte;
  2096. pd : PUCA_PropWeights;
  2097. begin
  2098. c := WeightLength;
  2099. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  2100. pd := ADest;
  2101. pd^.Weights[0] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
  2102. p := p + 2;
  2103. if not IsWeightCompress_1() then begin
  2104. pd^.Weights[1] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
  2105. p := p + 2;
  2106. end else begin
  2107. pd^.Weights[1] := p^;
  2108. p := p + 1;
  2109. end;
  2110. if not IsWeightCompress_2() then begin
  2111. pd^.Weights[2] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
  2112. p := p + 2;
  2113. end else begin
  2114. pd^.Weights[2] := p^;
  2115. p := p + 1;
  2116. end;
  2117. if (c > 1) then
  2118. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  2119. end;
  2120. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  2121. begin
  2122. Result := SizeOf(TUCA_PropItemRec);
  2123. if (WeightLength > 0) then begin
  2124. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  2125. if IsWeightCompress_1() then
  2126. Result := Result - 1;
  2127. if IsWeightCompress_2() then
  2128. Result := Result - 1;
  2129. end;
  2130. if HasCodePoint() then
  2131. Result := Result + SizeOf(UInt24);
  2132. if Contextual then
  2133. Result := Result + Cardinal(GetContext()^.Size);
  2134. end;
  2135. function TUCA_PropItemRec.GetContextual: Boolean;
  2136. begin
  2137. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  2138. end;
  2139. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  2140. var
  2141. p : PtrUInt;
  2142. begin
  2143. if not Contextual then
  2144. exit(nil);
  2145. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  2146. if IsBitON(Flags,FLAG_CODEPOINT) then
  2147. p := p + SizeOf(UInt24);
  2148. Result := PUCA_PropItemContextTreeRec(p);
  2149. end;
  2150. function TUCA_PropItemRec.IsDeleted() : Boolean;
  2151. begin
  2152. Result := IsBitON(Flags,FLAG_DELETION);
  2153. end;
  2154. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  2155. var
  2156. i : Cardinal;
  2157. begin
  2158. if (ABook^.BMP_Table2 = nil) then
  2159. exit(nil);
  2160. i := PUInt24(
  2161. PtrUInt(ABook^.BMP_Table2) +
  2162. ( ((ABook^.BMP_Table1[Hi(Word(AChar))] * 256) + Lo(Word(AChar))) *
  2163. SizeOf(UInt24)
  2164. )
  2165. )^;
  2166. {i := ABook^.BMP_Table2[
  2167. (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
  2168. Lo(Word(AChar))
  2169. ];}
  2170. if (i > 0) then
  2171. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  2172. else
  2173. Result := nil;
  2174. end;
  2175. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  2176. var
  2177. i : Cardinal;
  2178. begin
  2179. if (ABook^.OBMP_Table2 = nil) then
  2180. exit(nil);
  2181. i := PUInt24(
  2182. PtrUInt(ABook^.OBMP_Table2) +
  2183. ( (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  2184. Word(ALowS) - LOW_SURROGATE_BEGIN
  2185. ) *
  2186. SizeOf(UInt24)
  2187. )^;
  2188. {i := ABook^.OBMP_Table2[
  2189. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  2190. Word(ALowS) - LOW_SURROGATE_BEGIN
  2191. ]; }
  2192. if (i > 0) then
  2193. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  2194. else
  2195. Result := nil;
  2196. end;
  2197. {$include weight_derivation.inc}
  2198. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  2199. var
  2200. bb : TUCASortKey;
  2201. begin
  2202. SetLength(bb,Length(B));
  2203. if (Length(bb) > 0) then
  2204. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  2205. Result := CompareSortKey(A,bb);
  2206. end;
  2207. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  2208. var
  2209. i, hb : Integer;
  2210. begin
  2211. if (Pointer(A) = Pointer(B)) then
  2212. exit(0);
  2213. Result := 1;
  2214. hb := Length(B) - 1;
  2215. for i := 0 to Length(A) - 1 do begin
  2216. if (i > hb) then
  2217. exit;
  2218. if (A[i] < B[i]) then
  2219. exit(-1);
  2220. if (A[i] > B[i]) then
  2221. exit(1);
  2222. end;
  2223. if (Length(A) = Length(B)) then
  2224. exit(0);
  2225. exit(-1);
  2226. end;
  2227. type
  2228. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  2229. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  2230. var
  2231. r : TUCASortKey;
  2232. i, c, k, ral, levelCount : Integer;
  2233. pce : PUCA_PropWeights;
  2234. begin
  2235. c := Length(ACEList);
  2236. if (c = 0) then
  2237. exit(nil);
  2238. levelCount := Length(ACEList[0].Weights);
  2239. if (ACollation^.ComparisonStrength > 0) and
  2240. (ACollation^.ComparisonStrength < levelCount)
  2241. then begin
  2242. levelCount := ACollation^.ComparisonStrength;
  2243. end;
  2244. SetLength(r,(levelCount*c + levelCount));
  2245. ral := 0;
  2246. for i := 0 to levelCount - 1 do begin
  2247. if not ACollation^.Backwards[i] then begin
  2248. pce := @ACEList[0];
  2249. for k := 0 to c - 1 do begin
  2250. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  2251. r[ral] := pce^.Weights[i];
  2252. ral := ral + 1;
  2253. end;
  2254. pce := pce + 1;
  2255. end;
  2256. end else begin
  2257. pce := @ACEList[c-1];
  2258. for k := 0 to c - 1 do begin
  2259. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  2260. r[ral] := pce^.Weights[i];
  2261. ral := ral + 1;
  2262. end;
  2263. pce := pce - 1;
  2264. end;
  2265. end;
  2266. r[ral] := 0;
  2267. ral := ral + 1;
  2268. end;
  2269. ral := ral - 1;
  2270. SetLength(r,ral);
  2271. Result := r;
  2272. end;
  2273. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  2274. var
  2275. r : TUCASortKey;
  2276. i, c, k, ral, levelCount : Integer;
  2277. pce : PUCA_PropWeights;
  2278. begin
  2279. c := Length(ACEList);
  2280. if (c = 0) then
  2281. exit(nil);
  2282. levelCount := Length(ACEList[0].Weights);
  2283. if (ACollation^.ComparisonStrength > 0) and
  2284. (ACollation^.ComparisonStrength < levelCount)
  2285. then begin
  2286. levelCount := ACollation^.ComparisonStrength;
  2287. end;
  2288. SetLength(r,(levelCount*c + levelCount));
  2289. ral := 0;
  2290. for i := 0 to levelCount - 1 do begin
  2291. if not ACollation^.Backwards[i] then begin
  2292. pce := @ACEList[0];
  2293. for k := 0 to c - 1 do begin
  2294. if (pce^.Weights[i] <> 0) then begin
  2295. r[ral] := pce^.Weights[i];
  2296. ral := ral + 1;
  2297. end;
  2298. pce := pce + 1;
  2299. end;
  2300. end else begin
  2301. pce := @ACEList[c-1];
  2302. for k := 0 to c - 1 do begin
  2303. if (pce^.Weights[i] <> 0) then begin
  2304. r[ral] := pce^.Weights[i];
  2305. ral := ral + 1;
  2306. end;
  2307. pce := pce - 1;
  2308. end;
  2309. end;
  2310. r[ral] := 0;
  2311. ral := ral + 1;
  2312. end;
  2313. ral := ral - 1;
  2314. SetLength(r,ral);
  2315. Result := r;
  2316. end;
  2317. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  2318. var
  2319. r : TUCASortKey;
  2320. i, c, k, ral, levelCount : Integer;
  2321. pce : PUCA_PropWeights;
  2322. variableState : Boolean;
  2323. begin
  2324. c := Length(ACEList);
  2325. if (c = 0) then
  2326. exit(nil);
  2327. levelCount := Length(ACEList[0].Weights);
  2328. if (ACollation^.ComparisonStrength > 0) and
  2329. (ACollation^.ComparisonStrength < levelCount)
  2330. then begin
  2331. levelCount := ACollation^.ComparisonStrength;
  2332. end;
  2333. SetLength(r,(levelCount*c + levelCount));
  2334. ral := 0;
  2335. variableState := False;
  2336. for i := 0 to levelCount - 1 do begin
  2337. if not ACollation^.Backwards[i] then begin
  2338. variableState := False;
  2339. pce := @ACEList[0];
  2340. for k := 0 to c - 1 do begin
  2341. if not ACollation^.IsVariable(pce) then begin
  2342. if (pce^.Weights[0] <> 0) then
  2343. variableState := False;
  2344. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  2345. r[ral] := pce^.Weights[i];
  2346. ral := ral + 1;
  2347. end;
  2348. end else begin
  2349. variableState := True;
  2350. end;
  2351. pce := pce + 1;
  2352. end;
  2353. end else begin
  2354. pce := @ACEList[c-1];
  2355. for k := 0 to c - 1 do begin
  2356. if not ACollation^.IsVariable(pce) then begin
  2357. if (pce^.Weights[0] <> 0) then
  2358. variableState := False;
  2359. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  2360. r[ral] := pce^.Weights[i];
  2361. ral := ral + 1;
  2362. end;
  2363. end else begin
  2364. variableState := True;
  2365. end;
  2366. pce := pce - 1;
  2367. end;
  2368. end;
  2369. r[ral] := 0;
  2370. ral := ral + 1;
  2371. end;
  2372. ral := ral - 1;
  2373. SetLength(r,ral);
  2374. Result := r;
  2375. end;
  2376. function FormKeyShiftedTrimmed(
  2377. const ACEList : TUCA_PropWeightsArray;
  2378. const ACollation : PUCA_DataBook
  2379. ) : TUCASortKey;
  2380. var
  2381. i : Integer;
  2382. p : ^TUCASortKeyItem;
  2383. begin
  2384. Result := FormKeyShifted(ACEList,ACollation);
  2385. i := Length(Result) - 1;
  2386. if (i >= 0) then begin
  2387. p := @Result[i];
  2388. while (i >= 0) do begin
  2389. if (p^ <> $FFFF) then
  2390. Break;
  2391. Dec(i);
  2392. Dec(p);
  2393. end;
  2394. if ((i+1) < Length(Result)) then
  2395. SetLength(Result,(i+1));
  2396. end;
  2397. end;
  2398. function FindChild(
  2399. const ACodePoint : Cardinal;
  2400. const AParent : PUCA_PropItemRec
  2401. ) : PUCA_PropItemRec;inline;
  2402. var
  2403. k : Integer;
  2404. begin
  2405. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  2406. for k := 0 to AParent^.ChildCount - 1 do begin
  2407. if (ACodePoint = Result^.CodePoint) then
  2408. exit;
  2409. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  2410. end;
  2411. Result := nil;
  2412. end;
  2413. function ComputeSortKey(
  2414. const AString : UnicodeString;
  2415. const ACollation : PUCA_DataBook
  2416. ) : TUCASortKey;
  2417. begin
  2418. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  2419. end;
  2420. function ComputeRawSortKey(
  2421. const AStr : PUnicodeChar;
  2422. const ALength : SizeInt;
  2423. const ACollation : PUCA_DataBook
  2424. ) : TUCA_PropWeightsArray;
  2425. var
  2426. r : TUCA_PropWeightsArray;
  2427. ral {used length of "r"}: Integer;
  2428. rl {capacity of "r"} : Integer;
  2429. procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2430. begin
  2431. if (rl < AMinGrow) then
  2432. rl := rl + AMinGrow
  2433. else
  2434. rl := 2 * rl;
  2435. SetLength(r,rl);
  2436. end;
  2437. var
  2438. i : Integer;
  2439. s : UnicodeString;
  2440. psBase : PUnicodeChar;
  2441. ps : PUnicodeChar;
  2442. cp : Cardinal;
  2443. cl : PUCA_DataBook;
  2444. pp : PUCA_PropItemRec;
  2445. ppLevel : Byte;
  2446. removedCharIndex : array of DWord;
  2447. removedCharIndexLength : DWord;
  2448. locHistory : array[0..24] of record
  2449. i : Integer;
  2450. cl : PUCA_DataBook;
  2451. pp : PUCA_PropItemRec;
  2452. ppLevel : Byte;
  2453. cp : Cardinal;
  2454. removedCharIndexLength : DWord;
  2455. end;
  2456. locHistoryTop : Integer;
  2457. suppressState : record
  2458. cl : PUCA_DataBook;
  2459. CharCount : Integer;
  2460. end;
  2461. LastKeyOwner : record
  2462. Length : Integer;
  2463. Chars : array[0..24] of UInt24;
  2464. end;
  2465. procedure SaveKeyOwner();
  2466. var
  2467. k : Integer;
  2468. kppLevel : Byte;
  2469. begin
  2470. k := 0;
  2471. kppLevel := High(Byte);
  2472. while (k <= locHistoryTop) do begin
  2473. if (kppLevel <> locHistory[k].ppLevel) then begin
  2474. LastKeyOwner.Chars[k] := locHistory[k].cp;
  2475. kppLevel := locHistory[k].ppLevel;
  2476. end;
  2477. k := k + 1;
  2478. end;
  2479. if (k = 0) or (kppLevel <> ppLevel) then begin
  2480. LastKeyOwner.Chars[k] := cp;
  2481. k := k + 1;
  2482. end;
  2483. LastKeyOwner.Length := k;
  2484. end;
  2485. procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2486. begin
  2487. SaveKeyOwner();
  2488. if ((ral + AItem^.WeightLength) > rl) then
  2489. GrowKey(AItem^.WeightLength);
  2490. AItem^.GetWeightArray(@r[ral]);
  2491. ral := ral + AItem^.WeightLength;
  2492. end;
  2493. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2494. begin
  2495. if ((ral + AItem^.WeightCount) > rl) then
  2496. GrowKey(AItem^.WeightCount);
  2497. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  2498. ral := ral + AItem^.WeightCount;
  2499. end;
  2500. procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2501. begin
  2502. SaveKeyOwner();
  2503. if ((ral + 2) > rl) then
  2504. GrowKey();
  2505. DeriveWeight(ACodePoint,@r[ral]);
  2506. ral := ral + 2;
  2507. end;
  2508. procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2509. begin
  2510. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  2511. if (suppressState.cl = nil) or
  2512. (suppressState.CharCount > ppLevel)
  2513. then begin
  2514. suppressState.cl := cl;
  2515. suppressState.CharCount := ppLevel;
  2516. end;
  2517. end;
  2518. end;
  2519. procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2520. begin
  2521. Inc(locHistoryTop);
  2522. locHistory[locHistoryTop].i := i;
  2523. locHistory[locHistoryTop].cl := cl;
  2524. locHistory[locHistoryTop].pp := pp;
  2525. locHistory[locHistoryTop].ppLevel := ppLevel;
  2526. locHistory[locHistoryTop].cp := cp;
  2527. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  2528. RecordDeletion();
  2529. end;
  2530. procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2531. begin
  2532. locHistoryTop := -1;
  2533. end;
  2534. function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2535. begin
  2536. Result := (locHistoryTop >= 0);
  2537. end;
  2538. function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2539. begin
  2540. Result := (locHistoryTop + 1);
  2541. end;
  2542. procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2543. begin
  2544. Assert(locHistoryTop >= 0);
  2545. i := locHistory[locHistoryTop].i;
  2546. cp := locHistory[locHistoryTop].cp;
  2547. cl := locHistory[locHistoryTop].cl;
  2548. pp := locHistory[locHistoryTop].pp;
  2549. ppLevel := locHistory[locHistoryTop].ppLevel;
  2550. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  2551. ps := psBase + i;
  2552. Dec(locHistoryTop);
  2553. end;
  2554. var
  2555. c : Integer;
  2556. lastUnblockedNonstarterCCC : Byte;
  2557. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  2558. var
  2559. k : DWord;
  2560. pk : PUnicodeChar;
  2561. puk : PUC_Prop;
  2562. begin
  2563. k := AStartFrom;
  2564. if (k > c) then
  2565. exit(False);
  2566. if (removedCharIndexLength>0) and
  2567. (IndexInArrayDWord(removedCharIndex,k) >= 0)
  2568. then begin
  2569. exit(False);
  2570. end;
  2571. {if (k = (i+1)) or
  2572. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  2573. then
  2574. lastUnblockedNonstarterCCC := 0;}
  2575. pk := psBase + k-1;
  2576. if UnicodeIsHighSurrogate(pk^) then begin
  2577. if (k = c) then
  2578. exit(False);
  2579. if UnicodeIsLowSurrogate(pk[1]) then
  2580. puk := GetProps(pk[0],pk[1])
  2581. else
  2582. puk := GetProps(Word(pk^));
  2583. end else begin
  2584. puk := GetProps(Word(pk^));
  2585. end;
  2586. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  2587. exit(False);
  2588. lastUnblockedNonstarterCCC := puk^.CCC;
  2589. Result := True;
  2590. end;
  2591. procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2592. begin
  2593. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2594. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2595. removedCharIndex[removedCharIndexLength] := APos;
  2596. Inc(removedCharIndexLength);
  2597. if UnicodeIsHighSurrogate(psBase[APos]) and (APos < c) and UnicodeIsLowSurrogate(psBase[APos+1]) then begin
  2598. if (removedCharIndexLength >= Length(removedCharIndex)) then
  2599. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  2600. removedCharIndex[removedCharIndexLength] := APos+1;
  2601. Inc(removedCharIndexLength);
  2602. end;
  2603. end;
  2604. procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2605. begin
  2606. if (removedCharIndexLength = 0) then begin
  2607. Inc(i);
  2608. Inc(ps);
  2609. exit;
  2610. end;
  2611. while True do begin
  2612. Inc(i);
  2613. Inc(ps);
  2614. if (IndexInArrayDWord(removedCharIndex,i) = -1) then
  2615. Break;
  2616. end;
  2617. end;
  2618. var
  2619. surrogateState : Boolean;
  2620. function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2621. begin
  2622. Result := True;
  2623. if UnicodeIsHighSurrogate(ps[0]) then begin
  2624. if (i = c) then
  2625. exit(False);
  2626. if UnicodeIsLowSurrogate(ps[1]) then begin
  2627. surrogateState := True;
  2628. cp := ToUCS4(ps[0],ps[1]);
  2629. end else begin
  2630. surrogateState := False;
  2631. cp := Word(ps[0]);
  2632. end;
  2633. end else begin
  2634. surrogateState := False;
  2635. cp := Word(ps[0]);
  2636. end;
  2637. end;
  2638. procedure ClearPP(const AClearSuppressInfo : Boolean = True);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2639. begin
  2640. cl := nil;
  2641. pp := nil;
  2642. ppLevel := 0;
  2643. if AClearSuppressInfo then begin
  2644. suppressState.cl := nil;
  2645. suppressState.CharCount := 0;
  2646. end;
  2647. end;
  2648. function FindPropUCA() : Boolean;
  2649. var
  2650. candidateCL : PUCA_DataBook;
  2651. begin
  2652. pp := nil;
  2653. if (cl = nil) then
  2654. candidateCL := ACollation
  2655. else
  2656. candidateCL := cl;
  2657. if surrogateState then begin
  2658. while (candidateCL <> nil) do begin
  2659. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  2660. if (pp <> nil) then
  2661. break;
  2662. candidateCL := candidateCL^.Base;
  2663. end;
  2664. end else begin
  2665. while (candidateCL <> nil) do begin
  2666. pp := GetPropUCA(ps[0],candidateCL);
  2667. if (pp <> nil) then
  2668. break;
  2669. candidateCL := candidateCL^.Base;
  2670. end;
  2671. end;
  2672. cl := candidateCL;
  2673. Result := (pp <> nil);
  2674. end;
  2675. procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2676. var
  2677. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2678. begin
  2679. if (pp^.WeightLength > 0) then begin
  2680. AddWeights(pp);
  2681. end else
  2682. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2683. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2684. (ctxNode^.Data.WeightCount > 0)
  2685. then begin
  2686. AddContextWeights(@ctxNode^.Data);
  2687. end;
  2688. //AddWeights(pp);
  2689. ClearHistory();
  2690. ClearPP();
  2691. end;
  2692. procedure StartMatch();
  2693. procedure HandleLastChar();
  2694. var
  2695. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2696. begin
  2697. while True do begin
  2698. if pp^.IsValid() then begin
  2699. if (pp^.WeightLength > 0) then
  2700. AddWeights(pp)
  2701. else
  2702. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2703. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2704. (ctxNode^.Data.WeightCount > 0)
  2705. then
  2706. AddContextWeights(@ctxNode^.Data)
  2707. else
  2708. AddComputedWeights(cp){handle deletion of code point};
  2709. break;
  2710. end;
  2711. if (cl^.Base = nil) then begin
  2712. AddComputedWeights(cp);
  2713. break;
  2714. end;
  2715. cl := cl^.Base;
  2716. if not FindPropUCA() then begin
  2717. AddComputedWeights(cp);
  2718. break;
  2719. end;
  2720. end;
  2721. end;
  2722. var
  2723. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2724. begin
  2725. ppLevel := 0;
  2726. if not FindPropUCA() then begin
  2727. AddComputedWeights(cp);
  2728. ClearHistory();
  2729. ClearPP();
  2730. end else begin
  2731. if (i = c) then begin
  2732. HandleLastChar();
  2733. end else begin
  2734. if pp^.IsValid()then begin
  2735. if (pp^.ChildCount = 0) then begin
  2736. if (pp^.WeightLength > 0) then
  2737. AddWeights(pp)
  2738. else
  2739. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2740. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  2741. (tmpCtxNode^.Data.WeightCount > 0)
  2742. then
  2743. AddContextWeights(@tmpCtxNode^.Data)
  2744. else
  2745. AddComputedWeights(cp){handle deletion of code point};
  2746. ClearPP();
  2747. ClearHistory();
  2748. end else begin
  2749. RecordStep();
  2750. end
  2751. end else begin
  2752. if (pp^.ChildCount = 0) then begin
  2753. AddComputedWeights(cp);
  2754. ClearPP();
  2755. ClearHistory();
  2756. end else begin
  2757. RecordStep();
  2758. end;
  2759. end ;
  2760. end;
  2761. end;
  2762. end;
  2763. function TryPermutation() : Boolean;
  2764. var
  2765. kk, kkidx : Integer;
  2766. b : Boolean;
  2767. puk : PUC_Prop;
  2768. ppk : PUCA_PropItemRec;
  2769. begin
  2770. Result := False;
  2771. puk := GetProps(cp);
  2772. if (puk^.CCC = 0) then
  2773. exit;
  2774. lastUnblockedNonstarterCCC := puk^.CCC;
  2775. if surrogateState then
  2776. kk := i + 2
  2777. else
  2778. kk := i + 1;
  2779. while IsUnblockedNonstarter(kk) do begin
  2780. kkidx := kk-1;
  2781. b := UnicodeIsHighSurrogate(psBase[kkidx]) and (kk<c) and UnicodeIsLowSurrogate(psBase[kkidx+1]);
  2782. if b then
  2783. ppk := FindChild(ToUCS4(psBase[kkidx],psBase[kkidx+1]),pp)
  2784. else
  2785. ppk := FindChild(Word(psBase[kkidx]),pp);
  2786. if (ppk <> nil) then begin
  2787. pp := ppk;
  2788. RemoveChar(kk);
  2789. Inc(ppLevel);
  2790. RecordStep();
  2791. Result := True;
  2792. if (pp^.ChildCount = 0 ) then
  2793. Break;
  2794. end;
  2795. if b then
  2796. Inc(kk);
  2797. Inc(kk);
  2798. end;
  2799. end;
  2800. procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  2801. begin
  2802. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  2803. Inc(i);
  2804. Inc(ps);
  2805. end;
  2806. Inc_I();
  2807. end;
  2808. var
  2809. ok : Boolean;
  2810. pp1 : PUCA_PropItemRec;
  2811. cltemp : PUCA_DataBook;
  2812. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2813. begin
  2814. if (ALength = 0) then
  2815. exit(nil);
  2816. s := '';
  2817. if ACollation^.NoNormalization then begin
  2818. psBase := AStr;
  2819. c := ALength;
  2820. end else begin
  2821. s := NormalizeNFD(AStr,ALength);
  2822. c := Length(s);
  2823. psBase := @s[1];
  2824. end;
  2825. rl := 3*c;
  2826. SetLength(r,rl);
  2827. ral := 0;
  2828. ps := psBase;
  2829. ClearPP();
  2830. locHistoryTop := -1;
  2831. removedCharIndexLength := 0;
  2832. FillChar(suppressState,SizeOf(suppressState),#0);
  2833. LastKeyOwner.Length := 0;
  2834. i := 1;
  2835. while (i <= c) and MoveToNextChar() do begin
  2836. if (pp = nil) then begin // Start Matching
  2837. StartMatch();
  2838. end else begin
  2839. pp1 := FindChild(cp,pp);
  2840. if (pp1 <> nil) then begin
  2841. Inc(ppLevel);
  2842. pp := pp1;
  2843. if (pp^.ChildCount = 0) or (i = c) then begin
  2844. ok := False;
  2845. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2846. if (pp^.WeightLength > 0) then begin
  2847. AddWeightsAndClear();
  2848. ok := True;
  2849. end else
  2850. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2851. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2852. (ctxNode^.Data.WeightCount > 0)
  2853. then begin
  2854. AddContextWeights(@ctxNode^.Data);
  2855. ClearHistory();
  2856. ClearPP();
  2857. ok := True;
  2858. end
  2859. end;
  2860. if not ok then begin
  2861. RecordDeletion();
  2862. ok := False;
  2863. while HasHistory() do begin
  2864. GoBack();
  2865. if pp^.IsValid() and
  2866. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2867. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2868. )
  2869. then begin
  2870. AddWeightsAndClear();
  2871. ok := True;
  2872. Break;
  2873. end;
  2874. end;
  2875. if not ok then begin
  2876. cltemp := cl^.Base;
  2877. if (cltemp <> nil) then begin
  2878. ClearPP(False);
  2879. cl := cltemp;
  2880. Continue;
  2881. end;
  2882. end;
  2883. if not ok then begin
  2884. AddComputedWeights(cp);
  2885. ClearHistory();
  2886. ClearPP();
  2887. end;
  2888. end;
  2889. end else begin
  2890. RecordStep();
  2891. end;
  2892. end else begin
  2893. // permutations !
  2894. ok := False;
  2895. if TryPermutation() and pp^.IsValid() then begin
  2896. if (suppressState.CharCount = 0) then begin
  2897. AddWeightsAndClear();
  2898. Continue;
  2899. end;
  2900. while True do begin
  2901. if pp^.IsValid() and
  2902. (pp^.WeightLength > 0) and
  2903. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2904. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2905. )
  2906. then begin
  2907. AddWeightsAndClear();
  2908. ok := True;
  2909. break;
  2910. end;
  2911. if not HasHistory() then
  2912. break;
  2913. GoBack();
  2914. if (pp = nil) then
  2915. break;
  2916. end;
  2917. end;
  2918. if not ok then begin
  2919. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2920. if (pp^.WeightLength > 0) then begin
  2921. AddWeightsAndClear();
  2922. ok := True;
  2923. end else
  2924. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2925. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2926. (ctxNode^.Data.WeightCount > 0)
  2927. then begin
  2928. AddContextWeights(@ctxNode^.Data);
  2929. ClearHistory();
  2930. ClearPP();
  2931. ok := True;
  2932. end
  2933. end;
  2934. if ok then
  2935. Continue;
  2936. end;
  2937. if not ok then begin
  2938. if (cl^.Base <> nil) then begin
  2939. cltemp := cl^.Base;
  2940. while HasHistory() do
  2941. GoBack();
  2942. pp := nil;
  2943. ppLevel := 0;
  2944. cl := cltemp;
  2945. Continue;
  2946. end;
  2947. //walk back
  2948. ok := False;
  2949. while HasHistory() do begin
  2950. GoBack();
  2951. if pp^.IsValid() and
  2952. (pp^.WeightLength > 0) and
  2953. ( (suppressState.CharCount = 0) or
  2954. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2955. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2956. )
  2957. )
  2958. then begin
  2959. AddWeightsAndClear();
  2960. ok := True;
  2961. Break;
  2962. end;
  2963. end;
  2964. if ok then begin
  2965. AdvanceCharPos();
  2966. Continue;
  2967. end;
  2968. if (pp <> nil) then begin
  2969. AddComputedWeights(cp);
  2970. ClearHistory();
  2971. ClearPP();
  2972. end;
  2973. end;
  2974. end;
  2975. end;
  2976. if surrogateState then begin
  2977. Inc(ps);
  2978. Inc(i);
  2979. end;
  2980. //
  2981. Inc_I();
  2982. end;
  2983. SetLength(r,ral);
  2984. Result := r;
  2985. end;
  2986. type
  2987. TComputeKeyContext = record
  2988. Collation : PUCA_DataBook;
  2989. r : TUCA_PropWeightsArray;
  2990. ral {used length of "r"}: Integer;
  2991. rl {capacity of "r"} : Integer;
  2992. i : Integer;
  2993. s : UnicodeString;
  2994. ps : PUnicodeChar;
  2995. cp : Cardinal;
  2996. cl : PUCA_DataBook;
  2997. pp : PUCA_PropItemRec;
  2998. ppLevel : Byte;
  2999. removedCharIndex : array of DWord;
  3000. removedCharIndexLength : DWord;
  3001. locHistoryTop : Integer;
  3002. locHistory : array[0..24] of record
  3003. i : Integer;
  3004. cl : PUCA_DataBook;
  3005. pp : PUCA_PropItemRec;
  3006. ppLevel : Byte;
  3007. cp : Cardinal;
  3008. removedCharIndexLength : DWord;
  3009. end;
  3010. suppressState : record
  3011. cl : PUCA_DataBook;
  3012. CharCount : Integer;
  3013. end;
  3014. LastKeyOwner : record
  3015. Length : Integer;
  3016. Chars : array[0..24] of UInt24;
  3017. end;
  3018. c : Integer;
  3019. lastUnblockedNonstarterCCC : Byte;
  3020. surrogateState : Boolean;
  3021. Finished : Boolean;
  3022. end;
  3023. PComputeKeyContext = ^TComputeKeyContext;
  3024. procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
  3025. begin
  3026. AContext^.cl := nil;
  3027. AContext^.pp := nil;
  3028. AContext^.ppLevel := 0;
  3029. if AClearSuppressInfo then begin
  3030. AContext^.suppressState.cl := nil;
  3031. AContext^.suppressState.CharCount := 0;
  3032. end;
  3033. end;
  3034. procedure InitContext(
  3035. AContext : PComputeKeyContext;
  3036. const AStr : PUnicodeChar;
  3037. const ALength : SizeInt;
  3038. const ACollation : PUCA_DataBook
  3039. );
  3040. begin
  3041. AContext^.Collation := ACollation;
  3042. AContext^.c := ALength;
  3043. AContext^.s := NormalizeNFD(AStr,AContext^.c);
  3044. AContext^.c := Length(AContext^.s);
  3045. AContext^.rl := 3*AContext^.c;
  3046. SetLength(AContext^.r,AContext^.rl);
  3047. AContext^.ral := 0;
  3048. AContext^.ps := @AContext^.s[1];
  3049. ClearPP(AContext);
  3050. AContext^.locHistoryTop := -1;
  3051. AContext^.removedCharIndexLength := 0;
  3052. FillChar(AContext^.suppressState,SizeOf(AContext^.suppressState),#0);
  3053. AContext^.LastKeyOwner.Length := 0;
  3054. AContext^.i := 1;
  3055. AContext^.Finished := False;
  3056. end;
  3057. function FormKey(
  3058. const AWeightArray : TUCA_PropWeightsArray;
  3059. const ACollation : PUCA_DataBook
  3060. ) : TUCASortKey;inline;
  3061. begin
  3062. case ACollation.VariableWeight of
  3063. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
  3064. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
  3065. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
  3066. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
  3067. else
  3068. Result := FormKeyShifted(AWeightArray,ACollation);
  3069. end;
  3070. end;
  3071. function ComputeRawSortKeyNextItem(
  3072. const AContext : PComputeKeyContext
  3073. ) : Boolean;forward;
  3074. function IncrementalCompareString_NonIgnorable(
  3075. const AStrA : PUnicodeChar;
  3076. const ALengthA : SizeInt;
  3077. const AStrB : PUnicodeChar;
  3078. const ALengthB : SizeInt;
  3079. const ACollation : PUCA_DataBook
  3080. ) : Integer;
  3081. var
  3082. ctxA, ctxB : TComputeKeyContext;
  3083. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  3084. keyIndexB : Integer;
  3085. keyA, keyB : TUCASortKey;
  3086. begin
  3087. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  3088. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  3089. (ALengthA = ALengthB)
  3090. )
  3091. then
  3092. exit(0);
  3093. if (ALengthA = 0) then
  3094. exit(-1);
  3095. if (ALengthB = 0) then
  3096. exit(1);
  3097. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  3098. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  3099. lastKeyIndexA := -1;
  3100. keyIndexA := -1;
  3101. lengthMaxA := 0;
  3102. keyIndexB := -1;
  3103. while True do begin
  3104. if not ComputeRawSortKeyNextItem(@ctxA) then
  3105. Break;
  3106. if (ctxA.ral = lengthMaxA) then
  3107. Continue;
  3108. lengthMaxA := ctxA.ral;
  3109. keyIndexA := lastKeyIndexA + 1;
  3110. while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
  3111. Inc(keyIndexA);
  3112. end;
  3113. if (keyIndexA = lengthMaxA) then begin
  3114. lastKeyIndexA := keyIndexA-1;
  3115. Continue;
  3116. end;
  3117. while (keyIndexA < lengthMaxA) do begin
  3118. if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
  3119. Inc(keyIndexA);
  3120. Continue;
  3121. end;
  3122. Inc(keyIndexB);
  3123. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  3124. if (ctxB.ral <= keyIndexB) then begin
  3125. if not ComputeRawSortKeyNextItem(@ctxB) then
  3126. Break;
  3127. Continue;
  3128. end;
  3129. Inc(keyIndexB);
  3130. end;
  3131. if (ctxB.ral <= keyIndexB) then
  3132. exit(1);
  3133. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  3134. exit(1);
  3135. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  3136. exit(-1);
  3137. Inc(keyIndexA);
  3138. end;
  3139. lastKeyIndexA := keyIndexA - 1;
  3140. end;
  3141. //Key(A) is completed !
  3142. Inc(keyIndexB);
  3143. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  3144. if (ctxB.ral <= keyIndexB) then begin
  3145. if not ComputeRawSortKeyNextItem(@ctxB) then
  3146. Break;
  3147. Continue;
  3148. end;
  3149. Inc(keyIndexB);
  3150. end;
  3151. if (ctxB.ral > keyIndexB) then begin
  3152. //B has at least one more primary weight that A
  3153. exit(-1);
  3154. end;
  3155. while ComputeRawSortKeyNextItem(@ctxB) do begin
  3156. //
  3157. end;
  3158. //Key(B) is completed !
  3159. keyA := FormKey(ctxA.r,ctxA.Collation);
  3160. keyB := FormKey(ctxB.r,ctxB.Collation);
  3161. Result := CompareSortKey(keyA,keyB);
  3162. end;
  3163. function IncrementalCompareString_Shift(
  3164. const AStrA : PUnicodeChar;
  3165. const ALengthA : SizeInt;
  3166. const AStrB : PUnicodeChar;
  3167. const ALengthB : SizeInt;
  3168. const ACollation : PUCA_DataBook
  3169. ) : Integer;
  3170. var
  3171. ctxA, ctxB : TComputeKeyContext;
  3172. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  3173. keyIndexB : Integer;
  3174. keyA, keyB : TUCASortKey;
  3175. begin
  3176. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  3177. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  3178. (ALengthA = ALengthB)
  3179. )
  3180. then
  3181. exit(0);
  3182. if (ALengthA = 0) then
  3183. exit(-1);
  3184. if (ALengthB = 0) then
  3185. exit(1);
  3186. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  3187. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  3188. lastKeyIndexA := -1;
  3189. keyIndexA := -1;
  3190. lengthMaxA := 0;
  3191. keyIndexB := -1;
  3192. while True do begin
  3193. if not ComputeRawSortKeyNextItem(@ctxA) then
  3194. Break;
  3195. if (ctxA.ral = lengthMaxA) then
  3196. Continue;
  3197. lengthMaxA := ctxA.ral;
  3198. keyIndexA := lastKeyIndexA + 1;
  3199. while (keyIndexA < lengthMaxA) and
  3200. ( (ctxA.r[keyIndexA].Weights[0] = 0) or
  3201. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  3202. )
  3203. do begin
  3204. Inc(keyIndexA);
  3205. end;
  3206. if (keyIndexA = lengthMaxA) then begin
  3207. lastKeyIndexA := keyIndexA-1;
  3208. Continue;
  3209. end;
  3210. while (keyIndexA < lengthMaxA) do begin
  3211. if (ctxA.r[keyIndexA].Weights[0] = 0) or
  3212. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  3213. then begin
  3214. Inc(keyIndexA);
  3215. Continue;
  3216. end;
  3217. Inc(keyIndexB);
  3218. while (ctxB.ral <= keyIndexB) or
  3219. (ctxB.r[keyIndexB].Weights[0] = 0) or
  3220. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  3221. do begin
  3222. if (ctxB.ral <= keyIndexB) then begin
  3223. if not ComputeRawSortKeyNextItem(@ctxB) then
  3224. Break;
  3225. Continue;
  3226. end;
  3227. Inc(keyIndexB);
  3228. end;
  3229. if (ctxB.ral <= keyIndexB) then
  3230. exit(1);
  3231. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  3232. exit(1);
  3233. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  3234. exit(-1);
  3235. Inc(keyIndexA);
  3236. end;
  3237. lastKeyIndexA := keyIndexA - 1;
  3238. end;
  3239. //Key(A) is completed !
  3240. Inc(keyIndexB);
  3241. while (ctxB.ral <= keyIndexB) or
  3242. (ctxB.r[keyIndexB].Weights[0] = 0) or
  3243. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  3244. do begin
  3245. if (ctxB.ral <= keyIndexB) then begin
  3246. if not ComputeRawSortKeyNextItem(@ctxB) then
  3247. Break;
  3248. Continue;
  3249. end;
  3250. Inc(keyIndexB);
  3251. end;
  3252. if (ctxB.ral > keyIndexB) then begin
  3253. //B has at least one more primary weight that A
  3254. exit(-1);
  3255. end;
  3256. while ComputeRawSortKeyNextItem(@ctxB) do begin
  3257. //
  3258. end;
  3259. //Key(B) is completed !
  3260. keyA := FormKey(ctxA.r,ctxA.Collation);
  3261. keyB := FormKey(ctxB.r,ctxB.Collation);
  3262. Result := CompareSortKey(keyA,keyB);
  3263. end;
  3264. function IncrementalCompareString(
  3265. const AStrA : PUnicodeChar;
  3266. const ALengthA : SizeInt;
  3267. const AStrB : PUnicodeChar;
  3268. const ALengthB : SizeInt;
  3269. const ACollation : PUCA_DataBook
  3270. ) : Integer;
  3271. begin
  3272. case ACollation^.VariableWeight of
  3273. TUCA_VariableKind.ucaNonIgnorable :
  3274. begin
  3275. Result := IncrementalCompareString_NonIgnorable(
  3276. AStrA,ALengthA,AStrB,ALengthB,ACollation
  3277. );
  3278. end;
  3279. TUCA_VariableKind.ucaBlanked,
  3280. TUCA_VariableKind.ucaShiftedTrimmed,
  3281. TUCA_VariableKind.ucaIgnoreSP,
  3282. TUCA_VariableKind.ucaShifted:
  3283. begin
  3284. Result := IncrementalCompareString_Shift(
  3285. AStrA,ALengthA,AStrB,ALengthB,ACollation
  3286. );
  3287. end;
  3288. else
  3289. begin
  3290. Result := IncrementalCompareString_Shift(
  3291. AStrA,ALengthA,AStrB,ALengthB,ACollation
  3292. );
  3293. end;
  3294. end;
  3295. end;
  3296. function IncrementalCompareString(
  3297. const AStrA,
  3298. AStrB : UnicodeString;
  3299. const ACollation : PUCA_DataBook
  3300. ) : Integer;
  3301. begin
  3302. Result := IncrementalCompareString(
  3303. Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
  3304. ACollation
  3305. );
  3306. end;
  3307. function FilterString(
  3308. const AStr : PUnicodeChar;
  3309. const ALength : SizeInt;
  3310. const AExcludedMask : TCategoryMask
  3311. ) : UnicodeString;
  3312. var
  3313. i, c : SizeInt;
  3314. pp, pr : PUnicodeChar;
  3315. pu : PUC_Prop;
  3316. locIsSurrogate : Boolean;
  3317. begin
  3318. c := ALength;
  3319. SetLength(Result,(2*c));
  3320. if (c > 0) then begin
  3321. pp := AStr;
  3322. pr := @Result[1];
  3323. i := 1;
  3324. while (i <= c) do begin
  3325. pu := GetProps(Word(pp^));
  3326. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  3327. if locIsSurrogate then begin
  3328. if (i = c) then
  3329. Break;
  3330. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  3331. Inc(pp);
  3332. Inc(i);
  3333. Continue;
  3334. end;
  3335. pu := GetProps(pp[0],pp[1]);
  3336. end;
  3337. if not(pu^.Category in AExcludedMask) then begin
  3338. pr^ := pp^;
  3339. Inc(pr);
  3340. if locIsSurrogate then begin
  3341. Inc(pp);
  3342. Inc(pr);
  3343. Inc(i);
  3344. pr^ := pp^;
  3345. end;
  3346. end;
  3347. Inc(pp);
  3348. Inc(i);
  3349. end;
  3350. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  3351. SetLength(Result,i);
  3352. end;
  3353. end;
  3354. function FilterString(
  3355. const AStr : UnicodeString;
  3356. const AExcludedMask : TCategoryMask
  3357. ) : UnicodeString;
  3358. begin
  3359. if (AStr = '') then
  3360. Result := ''
  3361. else
  3362. Result := FilterString(@AStr[1],Length(AStr),AExcludedMask);
  3363. end;
  3364. function ComputeRawSortKeyNextItem(
  3365. const AContext : PComputeKeyContext
  3366. ) : Boolean;
  3367. var
  3368. ctx : PComputeKeyContext;
  3369. procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3370. begin
  3371. if (ctx^.rl < AMinGrow) then
  3372. ctx^.rl := ctx^.rl + AMinGrow
  3373. else
  3374. ctx^.rl := 2 * ctx^.rl;
  3375. SetLength(ctx^.r,ctx^.rl);
  3376. end;
  3377. procedure SaveKeyOwner();
  3378. var
  3379. k : Integer;
  3380. kppLevel : Byte;
  3381. begin
  3382. k := 0;
  3383. kppLevel := High(Byte);
  3384. while (k <= ctx^.locHistoryTop) do begin
  3385. if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
  3386. ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
  3387. kppLevel := ctx^.locHistory[k].ppLevel;
  3388. end;
  3389. k := k + 1;
  3390. end;
  3391. if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
  3392. ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
  3393. k := k + 1;
  3394. end;
  3395. ctx^.LastKeyOwner.Length := k;
  3396. end;
  3397. procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3398. begin
  3399. SaveKeyOwner();
  3400. if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
  3401. GrowKey(AItem^.WeightLength);
  3402. AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
  3403. ctx^.ral := ctx^.ral + AItem^.WeightLength;
  3404. end;
  3405. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3406. begin
  3407. if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
  3408. GrowKey(AItem^.WeightCount);
  3409. Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
  3410. ctx^.ral := ctx^.ral + AItem^.WeightCount;
  3411. end;
  3412. procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3413. begin
  3414. SaveKeyOwner();
  3415. if ((ctx^.ral + 2) > ctx^.rl) then
  3416. GrowKey();
  3417. DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
  3418. ctx^.ral := ctx^.ral + 2;
  3419. end;
  3420. procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3421. begin
  3422. if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  3423. if (ctx^.suppressState.cl = nil) or
  3424. (ctx^.suppressState.CharCount > ctx^.ppLevel)
  3425. then begin
  3426. ctx^.suppressState.cl := ctx^.cl;
  3427. ctx^.suppressState.CharCount := ctx^.ppLevel;
  3428. end;
  3429. end;
  3430. end;
  3431. procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3432. begin
  3433. Inc(ctx^.locHistoryTop);
  3434. ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
  3435. ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
  3436. ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
  3437. ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
  3438. ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
  3439. ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
  3440. RecordDeletion();
  3441. end;
  3442. procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3443. begin
  3444. ctx^.locHistoryTop := -1;
  3445. end;
  3446. function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3447. begin
  3448. Result := (ctx^.locHistoryTop >= 0);
  3449. end;
  3450. function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3451. begin
  3452. Result := (ctx^.locHistoryTop + 1);
  3453. end;
  3454. procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3455. begin
  3456. Assert(ctx^.locHistoryTop >= 0);
  3457. ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
  3458. ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
  3459. ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
  3460. ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
  3461. ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
  3462. ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
  3463. ctx^.ps := @ctx^.s[ctx^.i];
  3464. Dec(ctx^.locHistoryTop);
  3465. end;
  3466. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  3467. var
  3468. k : DWord;
  3469. pk : PUnicodeChar;
  3470. puk : PUC_Prop;
  3471. begin
  3472. k := AStartFrom;
  3473. if (k > ctx^.c) then
  3474. exit(False);
  3475. if (ctx^.removedCharIndexLength>0) and
  3476. (IndexInArrayDWord(ctx^.removedCharIndex,k) >= 0)
  3477. then begin
  3478. exit(False);
  3479. end;
  3480. {if (k = (i+1)) or
  3481. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  3482. then
  3483. lastUnblockedNonstarterCCC := 0;}
  3484. pk := @ctx^.s[k];
  3485. if UnicodeIsHighSurrogate(pk^) then begin
  3486. if (k = ctx^.c) then
  3487. exit(False);
  3488. if UnicodeIsLowSurrogate(pk[1]) then
  3489. puk := GetProps(pk[0],pk[1])
  3490. else
  3491. puk := GetProps(Word(pk^));
  3492. end else begin
  3493. puk := GetProps(Word(pk^));
  3494. end;
  3495. if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
  3496. exit(False);
  3497. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  3498. Result := True;
  3499. end;
  3500. procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3501. begin
  3502. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  3503. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  3504. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
  3505. Inc(ctx^.removedCharIndexLength);
  3506. if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
  3507. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  3508. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  3509. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
  3510. Inc(ctx^.removedCharIndexLength);
  3511. end;
  3512. end;
  3513. procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3514. begin
  3515. if (ctx^.removedCharIndexLength = 0) then begin
  3516. Inc(ctx^.i);
  3517. Inc(ctx^.ps);
  3518. exit;
  3519. end;
  3520. while True do begin
  3521. Inc(ctx^.i);
  3522. Inc(ctx^.ps);
  3523. if (IndexInArrayDWord(ctx^.removedCharIndex,ctx^.i) = -1) then
  3524. Break;
  3525. end;
  3526. end;
  3527. function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3528. begin
  3529. Result := True;
  3530. if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
  3531. if (ctx^.i = ctx^.c) then
  3532. exit(False);
  3533. if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  3534. ctx^.surrogateState := True;
  3535. ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
  3536. end else begin
  3537. ctx^.surrogateState := False;
  3538. ctx^.cp := Word(ctx^.ps[0]);
  3539. end;
  3540. end else begin
  3541. ctx^.surrogateState := False;
  3542. ctx^.cp := Word(ctx^.ps[0]);
  3543. end;
  3544. end;
  3545. function FindPropUCA() : Boolean;
  3546. var
  3547. candidateCL : PUCA_DataBook;
  3548. begin
  3549. ctx^.pp := nil;
  3550. if (ctx^.cl = nil) then
  3551. candidateCL := ctx^.Collation
  3552. else
  3553. candidateCL := ctx^.cl;
  3554. if ctx^.surrogateState then begin
  3555. while (candidateCL <> nil) do begin
  3556. ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
  3557. if (ctx^.pp <> nil) then
  3558. break;
  3559. candidateCL := candidateCL^.Base;
  3560. end;
  3561. end else begin
  3562. while (candidateCL <> nil) do begin
  3563. ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
  3564. if (ctx^.pp <> nil) then
  3565. break;
  3566. candidateCL := candidateCL^.Base;
  3567. end;
  3568. end;
  3569. ctx^.cl := candidateCL;
  3570. Result := (ctx^.pp <> nil);
  3571. end;
  3572. procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3573. var
  3574. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3575. begin
  3576. if (ctx^.pp^.WeightLength > 0) then begin
  3577. AddWeights(ctx^.pp);
  3578. end else
  3579. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3580. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3581. (ctxNode^.Data.WeightCount > 0)
  3582. then begin
  3583. AddContextWeights(@ctxNode^.Data);
  3584. end;
  3585. //AddWeights(pp);
  3586. ClearHistory();
  3587. ClearPP(ctx);
  3588. end;
  3589. function StartMatch() : Boolean;
  3590. procedure HandleLastChar();
  3591. var
  3592. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3593. begin
  3594. while True do begin
  3595. if ctx^.pp^.IsValid() then begin
  3596. if (ctx^.pp^.WeightLength > 0) then
  3597. AddWeights(ctx^.pp)
  3598. else
  3599. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3600. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3601. (ctxNode^.Data.WeightCount > 0)
  3602. then
  3603. AddContextWeights(@ctxNode^.Data)
  3604. else
  3605. AddComputedWeights(ctx^.cp){handle deletion of code point};
  3606. break;
  3607. end;
  3608. if (ctx^.cl^.Base = nil) then begin
  3609. AddComputedWeights(ctx^.cp);
  3610. break;
  3611. end;
  3612. ctx^.cl := ctx^.cl^.Base;
  3613. if not FindPropUCA() then begin
  3614. AddComputedWeights(ctx^.cp);
  3615. break;
  3616. end;
  3617. end;
  3618. end;
  3619. var
  3620. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  3621. begin
  3622. Result := False;
  3623. ctx^.ppLevel := 0;
  3624. if not FindPropUCA() then begin
  3625. AddComputedWeights(ctx^.cp);
  3626. ClearHistory();
  3627. ClearPP(ctx);
  3628. Result := True;
  3629. end else begin
  3630. if (ctx^.i = ctx^.c) then begin
  3631. HandleLastChar();
  3632. Result := True;
  3633. end else begin
  3634. if ctx^.pp^.IsValid()then begin
  3635. if (ctx^.pp^.ChildCount = 0) then begin
  3636. if (ctx^.pp^.WeightLength > 0) then
  3637. AddWeights(ctx^.pp)
  3638. else
  3639. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3640. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
  3641. (tmpCtxNode^.Data.WeightCount > 0)
  3642. then
  3643. AddContextWeights(@tmpCtxNode^.Data)
  3644. else
  3645. AddComputedWeights(ctx^.cp){handle deletion of code point};
  3646. ClearPP(ctx);
  3647. ClearHistory();
  3648. Result := True;
  3649. end else begin
  3650. RecordStep();
  3651. end
  3652. end else begin
  3653. if (ctx^.pp^.ChildCount = 0) then begin
  3654. AddComputedWeights(ctx^.cp);
  3655. ClearPP(ctx);
  3656. ClearHistory();
  3657. Result := True;
  3658. end else begin
  3659. RecordStep();
  3660. end;
  3661. end;
  3662. end;
  3663. end;
  3664. end;
  3665. function TryPermutation() : Boolean;
  3666. var
  3667. kk : Integer;
  3668. b : Boolean;
  3669. puk : PUC_Prop;
  3670. ppk : PUCA_PropItemRec;
  3671. begin
  3672. Result := False;
  3673. puk := GetProps(ctx^.cp);
  3674. if (puk^.CCC = 0) then
  3675. exit;
  3676. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  3677. if ctx^.surrogateState then
  3678. kk := ctx^.i + 2
  3679. else
  3680. kk := ctx^.i + 1;
  3681. while IsUnblockedNonstarter(kk) do begin
  3682. b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
  3683. if b then
  3684. ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
  3685. else
  3686. ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
  3687. if (ppk <> nil) then begin
  3688. ctx^.pp := ppk;
  3689. RemoveChar(kk);
  3690. Inc(ctx^.ppLevel);
  3691. RecordStep();
  3692. Result := True;
  3693. if (ctx^.pp^.ChildCount = 0 ) then
  3694. Break;
  3695. end;
  3696. if b then
  3697. Inc(kk);
  3698. Inc(kk);
  3699. end;
  3700. end;
  3701. procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
  3702. begin
  3703. if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  3704. Inc(ctx^.i);
  3705. Inc(ctx^.ps);
  3706. end;
  3707. Inc_I();
  3708. end;
  3709. var
  3710. ok : Boolean;
  3711. pp1 : PUCA_PropItemRec;
  3712. cltemp : PUCA_DataBook;
  3713. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3714. begin
  3715. if AContext^.Finished then
  3716. exit(False);
  3717. ctx := AContext;
  3718. while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
  3719. ok := False;
  3720. if (ctx^.pp = nil) then begin // Start Matching
  3721. ok := StartMatch();
  3722. end else begin
  3723. pp1 := FindChild(ctx^.cp,ctx^.pp);
  3724. if (pp1 <> nil) then begin
  3725. Inc(ctx^.ppLevel);
  3726. ctx^.pp := pp1;
  3727. if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
  3728. ok := False;
  3729. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3730. if (ctx^.pp^.WeightLength > 0) then begin
  3731. AddWeightsAndClear();
  3732. ok := True;
  3733. end else
  3734. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3735. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3736. (ctxNode^.Data.WeightCount > 0)
  3737. then begin
  3738. AddContextWeights(@ctxNode^.Data);
  3739. ClearHistory();
  3740. ClearPP(ctx);
  3741. ok := True;
  3742. end
  3743. end;
  3744. if not ok then begin
  3745. RecordDeletion();
  3746. while HasHistory() do begin
  3747. GoBack();
  3748. if ctx^.pp^.IsValid() and
  3749. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3750. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3751. )
  3752. then begin
  3753. AddWeightsAndClear();
  3754. ok := True;
  3755. Break;
  3756. end;
  3757. end;
  3758. if not ok then begin
  3759. cltemp := ctx^.cl^.Base;
  3760. if (cltemp <> nil) then begin
  3761. ClearPP(ctx,False);
  3762. ctx^.cl := cltemp;
  3763. Continue;
  3764. end;
  3765. end;
  3766. if not ok then begin
  3767. AddComputedWeights(ctx^.cp);
  3768. ClearHistory();
  3769. ClearPP(ctx);
  3770. ok := True;
  3771. end;
  3772. end;
  3773. end else begin
  3774. RecordStep();
  3775. end;
  3776. end else begin
  3777. // permutations !
  3778. ok := False;
  3779. if TryPermutation() and ctx^.pp^.IsValid() then begin
  3780. if (ctx^.suppressState.CharCount = 0) then begin
  3781. AddWeightsAndClear();
  3782. //ok := True;
  3783. exit(True);// Continue;
  3784. end;
  3785. while True do begin
  3786. if ctx^.pp^.IsValid() and
  3787. (ctx^.pp^.WeightLength > 0) and
  3788. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3789. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3790. )
  3791. then begin
  3792. AddWeightsAndClear();
  3793. ok := True;
  3794. break;
  3795. end;
  3796. if not HasHistory() then
  3797. break;
  3798. GoBack();
  3799. if (ctx^.pp = nil) then
  3800. break;
  3801. end;
  3802. end;
  3803. if not ok then begin
  3804. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3805. if (ctx^.pp^.WeightLength > 0) then begin
  3806. AddWeightsAndClear();
  3807. ok := True;
  3808. end else
  3809. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3810. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3811. (ctxNode^.Data.WeightCount > 0)
  3812. then begin
  3813. AddContextWeights(@ctxNode^.Data);
  3814. ClearHistory();
  3815. ClearPP(ctx);
  3816. ok := True;
  3817. end
  3818. end;
  3819. if ok then
  3820. exit(True);// Continue;
  3821. end;
  3822. if not ok then begin
  3823. if (ctx^.cl^.Base <> nil) then begin
  3824. cltemp := ctx^.cl^.Base;
  3825. while HasHistory() do
  3826. GoBack();
  3827. ctx^.pp := nil;
  3828. ctx^.ppLevel := 0;
  3829. ctx^.cl := cltemp;
  3830. Continue;
  3831. end;
  3832. //walk back
  3833. ok := False;
  3834. while HasHistory() do begin
  3835. GoBack();
  3836. if ctx^.pp^.IsValid() and
  3837. (ctx^.pp^.WeightLength > 0) and
  3838. ( (ctx^.suppressState.CharCount = 0) or
  3839. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3840. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3841. )
  3842. )
  3843. then begin
  3844. AddWeightsAndClear();
  3845. ok := True;
  3846. Break;
  3847. end;
  3848. end;
  3849. if ok then begin
  3850. AdvanceCharPos();
  3851. exit(True);// Continue;
  3852. end;
  3853. if (ctx^.pp <> nil) then begin
  3854. AddComputedWeights(ctx^.cp);
  3855. ClearHistory();
  3856. ClearPP(ctx);
  3857. ok := True;
  3858. end;
  3859. end;
  3860. end;
  3861. end;
  3862. if ctx^.surrogateState then begin
  3863. Inc(ctx^.ps);
  3864. Inc(ctx^.i);
  3865. end;
  3866. //
  3867. Inc_I();
  3868. if ok then
  3869. exit(True);
  3870. end;
  3871. SetLength(ctx^.r,ctx^.ral);
  3872. ctx^.Finished := True;
  3873. Result := True;
  3874. end;
  3875. function ComputeSortKey(
  3876. const AStr : PUnicodeChar;
  3877. const ALength : SizeInt;
  3878. const ACollation : PUCA_DataBook
  3879. ) : TUCASortKey;
  3880. var
  3881. r : TUCA_PropWeightsArray;
  3882. begin
  3883. r := ComputeRawSortKey(AStr,ALength,ACollation);
  3884. Result := FormKey(r,ACollation);
  3885. end;
  3886. end.