12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199 |
- { Unicode tables unit.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- -------------------------------------------------------------------------------
- Overview of the Unicode Collation Algorithm(UCA) data layout :
- ============================================================
- The UCA data(see “TUCA_DataBook”) are organized into index data
- (see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
- “OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
- the “Props” field of “TUCA_DataBook”). The index is a 3 level
- tables designed to minimize the overhaul data size. The
- properties’ data contain the actual (used) UCA’s properties
- for the customized code points(or sequence of code points)
- data (see TUCA_PropItemRec).
- To get the properties’ record of a code point, one goes
- through the index data to get its offset into the “Props”
- serialized data, see the “GetPropUCA” procedure.
- The “TUCA_PropItemRec” record, that represents the actual
- properties, contains a fixed part and a variable part. The
- fixed part is directly expressed as fields of the record :
- “WeightLength”, “ChildCount”, “Size”, “Flags”. The
- variable part depends on some values of the fixed part; For
- example “WeightLength” specify the number of weight[1] item,
- it can be zero or not null; The “Flags” fields does contains
- some bit states to indicate for example if the record’s owner,
- that is the target code point, is present(it is not always
- necessary to store the code point as you are required to have
- this information in the first place in order to get the
- “TUCA_PropItemRec” record).
- The data, as it is organized now, is as follow for each code point :
- * the fixed part is serialized,
- * if there are weight item array, they are serialized
- (see the "WeigthLength")
- * the code point is serialized (if needed)
- * the context[2] array is serialized
- * The children[3] record are serialized.
- The “Size” represent the size of the whole record, including its
- children records(see [3]). The “GetSelfOnlySize” returns the size
- of the queried record, excluding the size of its children.
- Notes :
- [1] : A weight item is an array of 3 words. A code point/sequence of code
- point may have zero or multiple items.
- [2] : There are characters(mostly japanese ones) that do not have their
- own weighs; There inherit the weights of the preceding character
- in the string that you will be evaluating.
- [3] : Some unicode characters are expressed using more than one code point.
- In that case the properties records are serialized as a trie. The
- trie data structure is useful when many characters’ expression have
- the same starting code point(s).
- [4] TUCA_PropItemRec serialization :
- TUCA_PropItemRec :
- WeightLength, ChildCount, Size, Flags [weight item array]
- [Code Point] [Context data]
- [Child 0] [Child 1] .. [Child n]
- each [Child k] is a TUCA_PropItemRec.
- }
- unit unicodedata;
- {$IFDEF FPC}
- {$mode delphi}
- {$H+}
- {$PACKENUM 1}
- {$warn 4056 off} //Conversion between ordinals and pointers is not portable
- {$DEFINE HAS_PUSH}
- {$DEFINE HAS_COMPARE_BYTE}
- {$DEFINE INLINE_SUPPORT_PRIVATE_VARS}
- {$DEFINE HAS_UNALIGNED}
- {$ENDIF FPC}
- {$IFNDEF FPC}
- {$UNDEF HAS_COMPARE_BYTE}
- {$UNDEF HAS_PUSH}
- {$DEFINE ENDIAN_LITTLE}
- {$ENDIF !FPC}
- {$SCOPEDENUMS ON}
- {$pointermath on}
- {$define USE_INLINE}
- { $define uni_debug}
- interface
- {$IFNDEF FPC}
- type
- UnicodeChar = WideChar;
- PUnicodeChar = ^UnicodeChar;
- SizeInt = NativeInt;
- DWord = UInt32;
- PDWord = ^DWord;
- PtrInt = NativeInt;
- PtrUInt = NativeUInt;
- {$ENDIF !FPC}
- {$IF not Declared(reCodesetConversion)}
- const reCodesetConversion = reRangeError;
- {$IFEND reCodesetConversion}
- {$IF not Declared(DirectorySeparator)}
- {$IFDEF MSWINDOWS}
- const DirectorySeparator = '\';
- {$ELSE}
- const DirectorySeparator = '/';
- {$ENDIF MSWINDOWS}
- {$IFEND DirectorySeparator}
- const
- MAX_WORD = High(Word);
- LOW_SURROGATE_BEGIN = Word($DC00);
- LOW_SURROGATE_END = Word($DFFF);
- HIGH_SURROGATE_BEGIN = Word($D800);
- HIGH_SURROGATE_END = Word($DBFF);
- HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
- UCS4_HALF_BASE = LongWord($10000);
- UCS4_HALF_MASK = Word($3FF);
- MAX_LEGAL_UTF32 = $10FFFF;
- const
- // Unicode General Category
- UGC_UppercaseLetter = 0;
- UGC_LowercaseLetter = 1;
- UGC_TitlecaseLetter = 2;
- UGC_ModifierLetter = 3;
- UGC_OtherLetter = 4;
- UGC_NonSpacingMark = 5;
- UGC_CombiningMark = 6;
- UGC_EnclosingMark = 7;
- UGC_DecimalNumber = 8;
- UGC_LetterNumber = 9;
- UGC_OtherNumber = 10;
- UGC_ConnectPunctuation = 11;
- UGC_DashPunctuation = 12;
- UGC_OpenPunctuation = 13;
- UGC_ClosePunctuation = 14;
- UGC_InitialPunctuation = 15;
- UGC_FinalPunctuation = 16;
- UGC_OtherPunctuation = 17;
- UGC_MathSymbol = 18;
- UGC_CurrencySymbol = 19;
- UGC_ModifierSymbol = 20;
- UGC_OtherSymbol = 21;
- UGC_SpaceSeparator = 22;
- UGC_LineSeparator = 23;
- UGC_ParagraphSeparator = 24;
- UGC_Control = 25;
- UGC_Format = 26;
- UGC_Surrogate = 27;
- UGC_PrivateUse = 28;
- UGC_Unassigned = 29;
-
- // Names
- UnicodeCategoryNames: array[0..29] of string[2] = (
- 'Lu',
- 'Ll',
- 'Lt',
- 'Lm',
- 'Lo',
- 'Mn',
- 'Mc',
- 'Me',
- 'Nd',
- 'Nl',
- 'No',
- 'Pc',
- 'Pd',
- 'Ps',
- 'Pe',
- 'Pi',
- 'Pf',
- 'Po',
- 'Sm',
- 'Sc',
- 'Sk',
- 'So',
- 'Zs',
- 'Zl',
- 'Zp',
- 'Cc',
- 'Cf',
- 'Cs',
- 'Co',
- 'Cn'
- );
- type
- TUInt24Rec = packed record
- public
- {$ifdef ENDIAN_LITTLE}
- a, b, c : Byte;
- {$else ENDIAN_LITTLE}
- c, b, a : Byte;
- {$endif ENDIAN_LITTLE}
- public
- property byte0 : Byte read a write a;
- property byte1 : Byte read b write b;
- property byte2 : Byte read c write c;
- public
- class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- end;
- UInt24 = TUInt24Rec;
- PUInt24 = ^UInt24;
- const
- ZERO_UINT24 : UInt24 =
- {$ifdef ENDIAN_LITTLE}
- (a : 0; b : 0; c : 0;);
- {$else ENDIAN_LITTLE}
- (c : 0; b : 0; a : 0;);
- {$endif ENDIAN_LITTLE}
- type
- PUC_Prop = ^TUC_Prop;
- { TUC_Prop }
- { On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
- This breaks intended layout of initialized constants/variables.
- A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
- but for now just declare this record as "unpacked". This causes bloat, but it's better than having
- entire unit not working at all. }
- TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
- private
- function GetCategory : Byte;inline;
- procedure SetCategory(AValue : Byte);
- function GetWhiteSpace : Boolean;inline;
- procedure SetWhiteSpace(AValue : Boolean);
- function GetHangulSyllable : Boolean;inline;
- procedure SetHangulSyllable(AValue : Boolean);
- function GetNumericValue: Double;inline;
- function GetUnifiedIdeograph : Boolean;inline;
- public //Shortned names
- C : Byte; //CategoryData
- C3 : Byte; //Canonical Combining Class
- N : Byte; //NumericIndex
- UC : UInt24; //SimpleUpperCase
- LC : UInt24; //SimpleLowerCase
- D : SmallInt; //DecompositionID
- public
- property CategoryData : Byte read C write C;
- property NumericIndex : Byte read N write N;
- property SimpleUpperCase : UInt24 read UC write UC;
- property SimpleLowerCase : UInt24 read LC write LC;
- property DecompositionID : SmallInt read D write D;
- public
- property Category : Byte read GetCategory write SetCategory;
- property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
- property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
- property UnifiedIdeograph : Boolean read GetUnifiedIdeograph;
- property NumericValue : Double read GetNumericValue;
- end;
- type
- TUCA_PropWeights = packed record
- Weights : array[0..2] of Word;
- end;
- PUCA_PropWeights = ^TUCA_PropWeights;
- TUCA_PropItemContextRec = packed record
- public
- CodePointCount : Byte;
- WeightCount : Byte;
- public
- function GetCodePoints() : PUInt24;inline;
- function GetWeights() : PUCA_PropWeights;inline;
- end;
- PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
- PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
- TUCA_PropItemContextTreeNodeRec = packed record
- public
- Left : Word;
- Right : Word;
- Data : TUCA_PropItemContextRec;
- public
- function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
- function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
- end;
- { TUCA_PropItemContextTreeRec }
- TUCA_PropItemContextTreeRec = packed record
- public
- Size : UInt24;
- public
- function GetData:PUCA_PropItemContextTreeNodeRec;inline;
- property Data : PUCA_PropItemContextTreeNodeRec read GetData;
- function Find(
- const AChars : PUInt24;
- const ACharCount : Integer;
- out ANode : PUCA_PropItemContextTreeNodeRec
- ) : Boolean;
- end;
- PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
- { TUCA_PropItemRec }
- TUCA_PropItemRec = packed record
- private
- const FLAG_VALID = 0;
- const FLAG_CODEPOINT = 1;
- const FLAG_CONTEXTUAL = 2;
- const FLAG_DELETION = 3;
- const FLAG_COMPRESS_WEIGHT_1 = 6;
- const FLAG_COMPRESS_WEIGHT_2 = 7;
- private
- function GetCodePoint() : UInt24;inline;
- public
- WeightLength : Byte;
- ChildCount : Byte;
- Size : Word;
- Flags : Byte;
- public
- function HasCodePoint() : Boolean;inline;
- property CodePoint : UInt24 read GetCodePoint;
- //Weights : array[0..WeightLength] of TUCA_PropWeights;
- function IsValid() : Boolean;inline;
- //function GetWeightArray() : PUCA_PropWeights;inline;
- procedure GetWeightArray(ADest : PUCA_PropWeights);
- function GetSelfOnlySize() : Cardinal;inline;
- function GetContextual() : Boolean;inline;
- property Contextual : Boolean read GetContextual;
- function GetContext() : PUCA_PropItemContextTreeRec;
- function IsDeleted() : Boolean;inline;
- function IsWeightCompress_1() : Boolean;inline;
- function IsWeightCompress_2() : Boolean;inline;
- end;
- PUCA_PropItemRec = ^TUCA_PropItemRec;
- TUCA_VariableKind = (
- ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
- ucaIgnoreSP // This one is not implemented !
- );
- TCollationName = array[0..(128-1)] of Byte;
- TCollationVersion = TCollationName;
- PUCA_DataBook = ^TUCA_DataBook;
- TUCA_DataBook = record
- public
- Base : PUCA_DataBook;
- Version : TCollationVersion;
- CollationName : TCollationName;
- VariableWeight : TUCA_VariableKind;
- Backwards : array[0..3] of Boolean;
- BMP_Table1 : PByte;
- BMP_Table2 : PUInt24;
- OBMP_Table1 : PWord;
- OBMP_Table2 : PUInt24;
- PropCount : Integer;
- Props : PUCA_PropItemRec;
- VariableLowLimit : Word;
- VariableHighLimit : Word;
- NoNormalization : Boolean;
- ComparisonStrength : Byte;
- Dynamic : Boolean;
- public
- function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
- end;
- TUnicodeStringArray = array of UnicodeString;
- TCollationTableItem = record
- Collation : PUCA_DataBook;
- Aliases : TUnicodeStringArray;
- end;
- PCollationTableItem = ^TCollationTableItem;
- TCollationTableItemArray = array of TCollationTableItem;
- { TCollationTable }
- TCollationTable = record
- private
- FItems : TCollationTableItemArray;
- FCount : Integer;
- private
- function GetCapacity : Integer;
- function GetCount : Integer;
- function GetItem(const AIndex : Integer) : PCollationTableItem;
- procedure Grow();
- procedure ClearItem(AItem : PCollationTableItem);
- procedure AddAlias(
- AItem : PCollationTableItem;
- AAlias : UnicodeString
- );overload;
- public
- class function NormalizeName(AName : UnicodeString) : UnicodeString;static;
- procedure Clear();
- function IndexOf(AName : UnicodeString) : Integer;overload;
- function IndexOf(ACollation : PUCA_DataBook) : Integer;overload;
- function Find(AName : UnicodeString) : PCollationTableItem;overload;
- function Find(ACollation : PUCA_DataBook) : PCollationTableItem;overload;
- function Add(ACollation : PUCA_DataBook) : Integer;
- function AddAlias(AName, AAlias : UnicodeString) : Boolean;overload;
- function Remove(AIndex : Integer) : PUCA_DataBook;
- property Item[const AIndex : Integer] : PCollationTableItem read GetItem;default;
- property Count : Integer read GetCount;
- property Capacity : Integer read GetCapacity;
- end;
- TCollationField = (
- BackWard, VariableLowLimit, VariableHighLimit, Alternate, Normalization,
- Strength
- );
- TCollationFields = set of TCollationField;
- const
- ROOT_COLLATION_NAME = 'DUCET';
- ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
- procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
- function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
- function UnicodeIsSurrogatePair(
- const AHighSurrogate,
- ALowSurrogate : UnicodeChar
- ) : Boolean;inline;
- function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
- function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
- function UnicodeToUpper(
- const AString : UnicodeString;
- const AIgnoreInvalidSequence : Boolean;
- out AResultString : UnicodeString
- ) : Integer;
- function UnicodeToLower(
- const AString : UnicodeString;
- const AIgnoreInvalidSequence : Boolean;
- out AResultString : UnicodeString
- ) : Integer;
- function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
- function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
- function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
- procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
- procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
- type
- TUCASortKeyItem = Word;
- TUCASortKey = array of TUCASortKeyItem;
- TCategoryMask = set of 0..31;
- const
- DEFAULT_UCA_COMPARISON_STRENGTH = 3;
- function ComputeSortKey(
- const AString : UnicodeString;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;inline;overload;
- function ComputeSortKey(
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;overload;
- function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
- function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
- function IncrementalCompareString(
- const AStrA : PUnicodeChar;
- const ALengthA : SizeInt;
- const AStrB : PUnicodeChar;
- const ALengthB : SizeInt;
- const ACollation : PUCA_DataBook
- ) : Integer;overload;
- function IncrementalCompareString(
- const AStrA,
- AStrB : UnicodeString;
- const ACollation : PUCA_DataBook
- ) : Integer;inline;overload;
- function FilterString(
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const AExcludedMask : TCategoryMask
- ) : UnicodeString;overload;
- function FilterString(
- const AStr : UnicodeString;
- const AExcludedMask : TCategoryMask
- ) : UnicodeString;overload;inline;
- function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
- function RegisterCollation(
- const ACollation : PUCA_DataBook;
- const AAliasList : array of UnicodeString
- ) : Boolean;overload;
- function RegisterCollation(
- ADirectory, ALanguage : UnicodeString
- ) : Boolean;overload;
- function AddAliasCollation(
- ACollation : PUCA_DataBook;
- AALias : UnicodeString
- ) : Boolean;
- function UnregisterCollation(AName : UnicodeString): Boolean;
- procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
- function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
- function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
- function GetCollationCount() : Integer;
- procedure PrepareCollation(
- ACollation : PUCA_DataBook;
- const ABaseName : UnicodeString;
- const AChangedFields : TCollationFields
- );
- function LoadCollation(
- const AData : Pointer;
- const ADataLength : Integer;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;overload;
- function LoadCollation(
- const AData : Pointer;
- const ADataLength : Integer
- ) : PUCA_DataBook;overload;
- function LoadCollation(
- const AFileName : UnicodeString;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;overload;
- function LoadCollation(
- const AFileName : UnicodeString
- ) : PUCA_DataBook;overload;
- function LoadCollation(
- const ADirectory,
- ALanguage : UnicodeString;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;overload;
- function LoadCollation(
- const ADirectory,
- ALanguage : UnicodeString
- ) : PUCA_DataBook;overload;
- procedure FreeCollation(AItem : PUCA_DataBook);
- type
- TSetOfByte = set of Byte;
- function BytesToString(
- const ABytes : array of Byte;
- const AValideChars : TSetOfByte
- ) : UnicodeString;
- function BytesToName(
- const ABytes : array of Byte
- ) : UnicodeString;
- type
- TEndianKind = (Little, Big);
- const
- ENDIAN_SUFFIX : array[TEndianKind] of UnicodeString = ('le','be');
- {$IFDEF ENDIAN_LITTLE}
- ENDIAN_NATIVE = TEndianKind.Little;
- ENDIAN_NON_NATIVE = TEndianKind.Big;
- {$ENDIF ENDIAN_LITTLE}
- {$IFDEF ENDIAN_BIG}
- ENDIAN_NATIVE = TEndianKind.Big;
- ENDIAN_NON_NATIVE = TEndianKind.Little;
- {$ENDIF ENDIAN_BIG}
- resourcestring
- SCollationNotFound = 'Collation not found : "%s".';
- implementation
- type
- TCardinalRec = packed record
- {$ifdef ENDIAN_LITTLE}
- byte0, byte1, byte2, byte3 : Byte;
- {$else ENDIAN_LITTLE}
- byte3, byte2, byte1, byte0 : Byte;
- {$endif ENDIAN_LITTLE}
- end;
- TWordRec = packed record
- {$ifdef ENDIAN_LITTLE}
- byte0, byte1 : Byte;
- {$else ENDIAN_LITTLE}
- byte1, byte0 : Byte;
- {$endif ENDIAN_LITTLE}
- end;
- const
- BYTES_OF_VALID_NAME_CHARS : set of Byte = [
- Ord('a')..Ord('z'), Ord('A')..Ord('Z'), Ord('-'),Ord('_')
- ];
- function BytesToString(
- const ABytes : array of Byte;
- const AValideChars : TSetOfByte
- ) : UnicodeString;
- var
- c, i, rl : Integer;
- pr : PWord;
- begin
- rl := 0;
- c := Length(ABytes);
- if (c > 0) then begin
- for i := 0 to c-1 do begin
- if not(ABytes[i] in AValideChars) then
- break;
- rl := rl+1;
- end;
- end;
- SetLength(Result,rl);
- if (rl > 0) then begin
- pr := PWord(@Result[1]);
- for i := 0 to rl-1 do begin
- pr^ := ABytes[i];
- Inc(pr);
- end;
- end;
- end;
- function BytesToName(
- const ABytes : array of Byte
- ) : UnicodeString;
- begin
- Result := BytesToString(ABytes,BYTES_OF_VALID_NAME_CHARS);
- end;
- { TCollationTable }
- function TCollationTable.GetCapacity : Integer;
- begin
- Result := Length(FItems);
- end;
- function TCollationTable.GetCount : Integer;
- begin
- if (FCount < 0) or (Length(FItems) < 1) or (FCount > Length(FItems)) then
- FCount := 0;
- Result := FCount;
- end;
- function TCollationTable.GetItem(const AIndex : Integer) : PCollationTableItem;
- begin
- if (AIndex < 0) or (AIndex >= Count) then
- Error(reRangeError);
- Result := @FItems[AIndex];
- end;
- procedure TCollationTable.Grow();
- var
- c0, c1 : Integer;
- begin
- c0 := Length(FItems);
- if (c0 < 1) then begin
- c0 := 1;
- if (FCount < 0) then
- FCount := 0;
- end;
- c1 := 2*c0;
- c0 := Length(FItems);
- SetLength(FItems,c1);
- FillChar(FItems[c0],((c1-c0)*SizeOf(TCollationTableItem)),#0);
- end;
- procedure TCollationTable.ClearItem(AItem : PCollationTableItem);
- begin
- if (AItem = nil) then
- exit;
- AItem^.Collation := nil;
- SetLength(AItem^.Aliases,0);
- end;
- procedure TCollationTable.AddAlias(
- AItem : PCollationTableItem;
- AAlias : UnicodeString
- );
- var
- n : UnicodeString;
- c, i : Integer;
- begin
- n := NormalizeName(AAlias);
- if (n = '') then
- exit;
- c := Length(AItem^.Aliases);
- if (c > 0) then begin
- for i := 0 to c-1 do begin
- if (AItem^.Aliases[i] = n) then
- exit;
- end;
- end;
- SetLength(AItem^.Aliases,(c+1));
- AItem^.Aliases[c] := n;
- end;
- class function TCollationTable.NormalizeName(
- AName : UnicodeString
- ) : UnicodeString;
- var
- r : UnicodeString;
- c, i, rl : Integer;
- cx : Word;
- begin
- c := Length(AName);
- rl := 0;
- SetLength(r,c);
- for i := 1 to c do begin
- case Ord(AName[i]) of
- Ord('a')..Ord('z') : cx := Ord(AName[i]);
- Ord('A')..Ord('Z') : cx := Ord(AName[i])+(Ord('a')-Ord('A'));
- Ord('0')..Ord('9'),
- Ord('-'), Ord('_') : cx := Ord(AName[i]);
- else
- cx := 0;
- end;
- if (cx > 0) then begin
- rl := rl+1;
- r[rl] := UnicodeChar(cx);
- end;
- end;
- SetLength(r,rl);
- Result := r;
- end;
- procedure TCollationTable.Clear();
- var
- p : PCollationTableItem;
- i : Integer;
- begin
- if (Count < 1) then
- exit;
- p := @FItems[0];
- for i := 0 to Count-1 do begin;
- ClearItem(p);
- Inc(p);
- end;
- FCount := 0;
- end;
- function TCollationTable.IndexOf(AName : UnicodeString) : Integer;
- var
- c, i, k : Integer;
- p : PCollationTableItem;
- n : UnicodeString;
- begin
- c := Count;
- if (c > 0) then begin
- // Names
- n := NormalizeName(AName);
- p := @FItems[0];
- for i := 0 to c-1 do begin
- if (Length(p^.Aliases) > 0) and (p^.Aliases[0] = n) then
- exit(i);
- Inc(p);
- end;
- // Aliases
- p := @FItems[0];
- for i := 0 to c-1 do begin
- if (Length(p^.Aliases) > 1) then begin
- for k := 1 to Length(p^.Aliases)-1 do begin
- if (p^.Aliases[k] = n) then
- exit(i);
- end;
- end;
- Inc(p);
- end;
- end;
- Result := -1;
- end;
- function TCollationTable.IndexOf(ACollation : PUCA_DataBook) : Integer;
- var
- c, i : Integer;
- p : PCollationTableItem;
- begin
- c := Count;
- if (c > 0) then begin
- p := @FItems[0];
- for i := 0 to c-1 do begin
- if (p^.Collation = ACollation) then
- exit(i);
- Inc(p);
- end;
- end;
- Result := -1;
- end;
- function TCollationTable.Find(AName : UnicodeString) : PCollationTableItem;
- var
- i : Integer;
- begin
- i := IndexOf(AName);
- if (i >= 0) then
- Result := @FItems[i]
- else
- Result := nil;
- end;
- function TCollationTable.Find(ACollation : PUCA_DataBook) : PCollationTableItem;
- var
- i : Integer;
- begin
- i := IndexOf(ACollation);
- if (i >= 0) then
- Result := @FItems[i]
- else
- Result := nil;
- end;
- function TCollationTable.Add(ACollation : PUCA_DataBook) : Integer;
- var
- c : Integer;
- p : PCollationTableItem;
- begin
- Result := IndexOf(ACollation);
- if (Result < 0) then begin
- c := Count;
- if (c >= Capacity) then
- Grow();
- p := @FItems[c];
- p^.Collation := ACollation;
- SetLength(p^.Aliases,1);
- p^.Aliases[0] := NormalizeName(BytesToName(ACollation^.CollationName));
- FCount := FCount+1;
- Result := c;
- end;
- end;
- function TCollationTable.AddAlias(AName, AAlias : UnicodeString) : Boolean;
- var
- p : PCollationTableItem;
- begin
- p := Find(AName);
- Result := (p <> nil);
- if Result then
- AddAlias(p,AAlias);
- end;
- function TCollationTable.Remove(AIndex : Integer) : PUCA_DataBook;
- var
- p, q : PCollationTableItem;
- c, i : Integer;
- begin
- if (AIndex < 0) or (AIndex >= Count) then
- Error(reRangeError);
- p := @FItems[AIndex];
- Result := p^.Collation;
- ClearItem(p);
- c := Count;
- if (AIndex < (c-1)) then begin
- for i := AIndex+1 to c-1 do begin
- q := p;
- Inc(p);
- Move(p^,q^,SizeOf(TCollationTableItem));
- end;
- FillChar(p^,SizeOf(TCollationTableItem),#0);
- end;
- FCount := FCount-1;
- end;
- { TUInt24Rec }
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
- begin
- TCardinalRec(Result).byte0 := a.byte0;
- TCardinalRec(Result).byte1 := a.byte1;
- TCardinalRec(Result).byte2 := a.byte2;
- TCardinalRec(Result).byte3 := 0;
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
- begin
- Result := Cardinal(a);
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
- begin
- {$IFOPT R+}
- if (a > $FFFF) then
- Error(reIntOverflow);
- {$ENDIF R+}
- TWordRec(Result).byte0 := a.byte0;
- TWordRec(Result).byte1 := a.byte1;
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
- begin
- {$IFOPT R+}
- if (a > $FF) then
- Error(reIntOverflow);
- {$ENDIF R+}
- Result := a.byte0;
- end;
- class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
- begin
- {$IFOPT R+}
- if (a > $FFFFFF) then
- Error(reIntOverflow);
- {$ENDIF R+}
- Result.byte0 := TCardinalRec(a).byte0;
- Result.byte1 := TCardinalRec(a).byte1;
- Result.byte2 := TCardinalRec(a).byte2;
- end;
- class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
- begin
- Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
- begin
- Result := (TCardinalRec(b).byte3 = 0) and
- (a.byte0 = TCardinalRec(b).byte0) and
- (a.byte1 = TCardinalRec(b).byte1) and
- (a.byte2 = TCardinalRec(b).byte2);
- end;
- class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
- begin
- Result := (LongInt(a) = b);
- end;
- class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
- begin
- Result := (a.byte2 = 0) and
- (a.byte0 = TWordRec(b).byte0) and
- (a.byte1 = TWordRec(b).byte1);
- end;
- class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
- begin
- Result := (a.byte2 = 0) and
- (a.byte1 = 0) and
- (a.byte0 = b);
- end;
- class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
- begin
- Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
- end;
- class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
- begin
- Result := (TCardinalRec(b).byte3 <> 0) or
- (a.byte0 <> TCardinalRec(b).byte0) or
- (a.byte1 <> TCardinalRec(b).byte1) or
- (a.byte2 <> TCardinalRec(b).byte2);
- end;
- class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
- begin
- Result := (b <> a);
- end;
- class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
- begin
- Result := (a.byte2 > b.byte2) or
- ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
- ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
- end;
- class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) > b;
- end;
- class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a > Cardinal(b);
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
- begin
- Result := (a.byte2 > b.byte2) or
- ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
- ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) >= b;
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a >= Cardinal(b);
- end;
- class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
- begin
- Result := (b > a);
- end;
- class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) < b;
- end;
- class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a < Cardinal(b);
- end;
- class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
- begin
- Result := (b >= a);
- end;
- class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) <= b;
- end;
- class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a <= Cardinal(b);
- end;
- type
- TBitOrder = 0..7;
- function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
- begin
- Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
- end;
- procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
- begin
- if AValue then
- AData := AData or (1 shl (ABit mod 8))
- else
- AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
- end;
- {$IFNDEF HAS_COMPARE_BYTE}
- function CompareByte(const A, B; ALength : SizeInt):SizeInt;
- var
- pa, pb : PByte;
- i : Integer;
- begin
- if (ALength < 1) then
- exit(0);
- pa := PByte(@A);
- pb := PByte(@B);
- if (pa = pb) then
- exit(0);
- for i := 1 to ALength do begin
- if (pa^ <> pb^) then
- exit(i);
- pa := pa+1;
- pb := pb+1;
- end;
- Result := 0;
- end;
- {$ENDIF HAS_COMPARE_BYTE}
- function IndexInArrayDWord(const ABuffer : array of DWord; AItem : DWord) : SizeInt;
- var
- c, i : Integer;
- p : PDWord;
- begin
- Result := -1;
- c := Length(ABuffer);
- if (c < 1) then
- exit;
- p := @ABuffer[Low(ABuffer)];
- for i := 1 to c do begin
- if (p^ = AItem) then begin
- Result := i-1;
- break;
- end;
- p := p+1;
- end;
- end;
- var
- CollationTable : TCollationTable;
- function IndexOfCollation(AName : UnicodeString) : Integer;
- begin
- Result := CollationTable.IndexOf(AName);
- end;
- function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
- begin
- Result := RegisterCollation(ACollation,[]);
- end;
- function RegisterCollation(
- const ACollation : PUCA_DataBook;
- const AAliasList : array of UnicodeString
- ) : Boolean;
- var
- i : Integer;
- p : PCollationTableItem;
- begin
- Result := (CollationTable.IndexOf(BytesToName(ACollation^.CollationName)) = -1);
- if Result then begin
- i := CollationTable.Add(ACollation);
- if (Length(AAliasList) > 0) then begin
- p := CollationTable[i];
- for i := Low(AAliasList) to High(AAliasList) do
- CollationTable.AddAlias(p,AAliasList[i]);
- end;
- end;
- end;
- function RegisterCollation(ADirectory, ALanguage : UnicodeString) : Boolean;
- var
- cl : PUCA_DataBook;
- al : TUnicodeStringArray;
- begin
- al := nil;
- cl := LoadCollation(ADirectory,ALanguage,al);
- if (cl = nil) then
- exit(False);
- try
- Result := RegisterCollation(cl,al);
- except
- FreeCollation(cl);
- raise;
- end;
- if not Result then
- FreeCollation(cl);
- end;
- function AddAliasCollation(
- ACollation : PUCA_DataBook;
- AALias : UnicodeString
- ) : Boolean;
- var
- p : PCollationTableItem;
- begin
- Result := False;
- if (ACollation <> nil) then begin
- p := CollationTable.Find(ACollation);
- if (p <> nil) then begin
- CollationTable.AddAlias(p,AALias);
- Result := True;
- end;
- end;
- end;
- function UnregisterCollation(AName : UnicodeString): Boolean;
- var
- i : Integer;
- begin
- i := CollationTable.IndexOf(AName);
- Result := (i >= 0);
- if Result then
- CollationTable.Remove(i);
- end;
- procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
- var
- i : Integer;
- p : PCollationTableItem;
- begin
- if AFreeDynamicCollations then begin
- for i := 0 to CollationTable.Count-1 do begin
- p := CollationTable[i];
- if p^.Collation.Dynamic then begin
- FreeCollation(p^.Collation);
- p^.Collation := nil;
- end;
- end;
- end;
- CollationTable.Clear();
- end;
- function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
- var
- p : PCollationTableItem;
- begin
- p := CollationTable.Find(AName);
- if (p <> nil) then
- Result := p^.Collation
- else
- Result := nil;
- end;
- function GetCollationCount() : Integer;
- begin
- Result := CollationTable.Count;
- end;
- function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
- var
- p : PCollationTableItem;
- begin
- p := CollationTable[AIndex];
- if (p <> nil) then
- Result := p^.Collation
- else
- Result := nil;
- end;
- procedure PrepareCollation(
- ACollation : PUCA_DataBook;
- const ABaseName : UnicodeString;
- const AChangedFields : TCollationFields
- );
- var
- s : UnicodeString;
- p, base : PUCA_DataBook;
- begin
- if (ABaseName <> '') then
- s := ABaseName
- else
- s := ROOT_COLLATION_NAME;
- p := ACollation;
- base := FindCollation(s);
- if (base = nil) then
- Error(reCodesetConversion);
- p^.Base := base;
- if not(TCollationField.BackWard in AChangedFields) then
- p^.Backwards := base^.Backwards;
- if not(TCollationField.VariableLowLimit in AChangedFields) then
- p^.VariableLowLimit := base^.VariableLowLimit;
- if not(TCollationField.VariableHighLimit in AChangedFields) then
- p^.VariableLowLimit := base^.VariableHighLimit;
- if not(TCollationField.Alternate in AChangedFields) then
- p^.VariableWeight := base^.VariableWeight;
- if not(TCollationField.Normalization in AChangedFields) then
- p^.NoNormalization := base^.NoNormalization;
- if not(TCollationField.Strength in AChangedFields) then
- p^.ComparisonStrength := base^.ComparisonStrength;
- end;
- type
- TSerializedCollationHeader = packed record
- Base : TCollationName;
- Version : TCollationVersion;
- CollationName : TCollationName;
- CollationAliases : TCollationName; // ";" separated
- VariableWeight : Byte;
- Backwards : Byte;
- BMP_Table1Length : DWord;
- BMP_Table2Length : DWord;
- OBMP_Table1Length : DWord;
- OBMP_Table2Length : DWord;
- PropCount : DWord;
- VariableLowLimit : Word;
- VariableHighLimit : Word;
- NoNormalization : Byte;
- Strength : Byte;
- ChangedFields : Byte;
- end;
- PSerializedCollationHeader = ^TSerializedCollationHeader;
- procedure FreeCollation(AItem : PUCA_DataBook);
- var
- h : PSerializedCollationHeader;
- begin
- if (AItem = nil) or not(AItem^.Dynamic) then
- exit;
- h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
- if (AItem^.BMP_Table1 <> nil) then
- FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
- if (AItem^.BMP_Table2 <> nil) then
- FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
- if (AItem^.OBMP_Table1 <> nil) then
- FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
- if (AItem^.OBMP_Table2 <> nil) then
- FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
- if (AItem^.Props <> nil) then
- FreeMem(AItem^.Props,h^.PropCount);
- FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
- end;
- function ParseAliases(AStr : UnicodeString) : TUnicodeStringArray;
- var
- r : TUnicodeStringArray;
- c, k, i : Integer;
- s : UnicodeString;
- begin
- SetLength(r,0);
- c := Length(AStr);
- k := 1;
- for i := 1 to c do begin
- if (AStr[i] <> ';') then begin
- k := i;
- break;
- end;
- end;
- s := '';
- for i := 1 to c do begin
- if (AStr[i] = ';') then begin
- s := Copy(AStr,k,(i-k));
- end else if (i = c) then begin
- s := Copy(AStr,k,(i+1-k));
- end;
- if (s <> '') then begin
- SetLength(r,(Length(r)+1));
- r[High(r)] := s;
- s := '';
- k := i+1;
- end;
- end;
- Result := r;
- end;
- function LoadCollation(
- const AData : Pointer;
- const ADataLength : Integer;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;
- var
- dataPointer : PByte;
- readedLength : LongInt;
- function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
- begin
- Result := (readedLength + ALength) <= ADataLength;
- if not result then
- exit;
- Move(dataPointer^,ADest^,ALength);
- Inc(dataPointer,ALength);
- readedLength := readedLength + ALength;
- end;
- var
- r : PUCA_DataBook;
- h : PSerializedCollationHeader;
- cfs : TCollationFields;
- i : Integer;
- baseName, s : UnicodeString;
- begin
- Result := nil;
- readedLength := 0;
- AAliases := nil;
- dataPointer := AData;
- r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
- try
- h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
- if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
- exit;
- r^.Version := h^.Version;
- r^.CollationName := h^.CollationName;
- r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
- r^.Backwards[0] := IsBitON(h^.Backwards,0);
- r^.Backwards[1] := IsBitON(h^.Backwards,1);
- r^.Backwards[2] := IsBitON(h^.Backwards,2);
- r^.Backwards[3] := IsBitON(h^.Backwards,3);
- if (h^.BMP_Table1Length > 0) then begin
- r^.BMP_Table1 := GetMemory(h^.BMP_Table1Length);
- if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
- exit;
- end;
- if (h^.BMP_Table2Length > 0) then begin
- r^.BMP_Table2 := GetMemory(h^.BMP_Table2Length);
- if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
- exit;
- end;
- if (h^.OBMP_Table1Length > 0) then begin
- r^.OBMP_Table1 := GetMemory(h^.OBMP_Table1Length);
- if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
- exit;
- end;
- if (h^.OBMP_Table2Length > 0) then begin
- r^.OBMP_Table2 := GetMemory(h^.OBMP_Table2Length);
- if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
- exit;
- end;
- r^.PropCount := h^.PropCount;
- if (h^.PropCount > 0) then begin
- r^.Props := GetMemory(h^.PropCount);
- if not ReadBuffer(r^.Props,h^.PropCount) then
- exit;
- end;
- r^.VariableLowLimit := h^.VariableLowLimit;
- r^.VariableHighLimit := h^.VariableHighLimit;
- r^.NoNormalization := (h^.NoNormalization <> 0);
- r^.ComparisonStrength := h^.Strength;
- cfs := [];
- for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
- if IsBitON(h^.ChangedFields,i) then
- cfs := cfs + [TCollationField(i)];
- end;
- baseName := BytesToName(h^.Base);
- if (baseName = '') then begin
- if (BytesToName(h^.CollationName) <> ROOT_COLLATION_NAME) then
- baseName := ROOT_COLLATION_NAME
- else
- baseName := '';
- end;
- if (baseName <> '') then
- PrepareCollation(r,baseName,cfs);
- s := BytesToString(h^.CollationAliases,(BYTES_OF_VALID_NAME_CHARS+[Ord(';')]));
- if (s <> '') then
- AAliases := ParseAliases(s);
- r^.Dynamic := True;
- Result := r;
- except
- FreeCollation(r);
- raise;
- end;
- end;
- function LoadCollation(
- const AData : Pointer;
- const ADataLength : Integer
- ) : PUCA_DataBook;
- var
- al : TUnicodeStringArray;
- begin
- al := nil;
- Result := LoadCollation(AData,ADataLength,al);
- end;
- {$IFDEF HAS_PUSH}
- {$PUSH}
- {$ENDIF HAS_PUSH}
- {$IFNDEF HAS_PUSH}
- {$IFOPT I+}
- {$DEFINE I_PLUS}
- {$ELSE}
- {$UNDEF I_PLUS}
- {$ENDIF}
- {$ENDIF HAS_PUSH}
- function LoadCollation(
- const AFileName : UnicodeString;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;
- const
- BLOCK_SIZE = 16*1024;
- var
- f : File of Byte;
- locSize, locReaded, c : LongInt;
- locBuffer : PByte;
- locBlockSize : LongInt;
- begin
- Result := nil;
- {$I-}
- if (AFileName = '') then
- exit;
- Assign(f,AFileName);
- Reset(f);
- try
- if (IOResult <> 0) then
- exit;
- locSize := FileSize(f);
- if (locSize < SizeOf(TSerializedCollationHeader)) then
- exit;
- locBuffer := GetMemory(locSize);
- try
- locBlockSize := BLOCK_SIZE;
- locReaded := 0;
- while (locReaded < locSize) do begin
- if (locBlockSize > (locSize-locReaded)) then
- locBlockSize := locSize-locReaded;
- BlockRead(f,locBuffer[locReaded],locBlockSize,c);
- if (IOResult <> 0) or (c <= 0) then
- exit;
- locReaded := locReaded + c;
- end;
- Result := LoadCollation(locBuffer,locSize,AAliases);
- finally
- FreeMemory(locBuffer);
- end;
- finally
- Close(f);
- end;
- end;
- function LoadCollation(
- const AFileName : UnicodeString
- ) : PUCA_DataBook;
- var
- al : TUnicodeStringArray;
- begin
- al := nil;
- Result := LoadCollation(AFileName,al);
- end;
- {$IFDEF HAS_PUSH}
- {$POP}
- {$ELSE}
- {$IFDEF I_PLUS}
- {$I+}
- {$ELSE}
- {$I-}
- {$ENDIF}
- {$ENDIF HAS_PUSH}
- function LoadCollation(
- const ADirectory,
- ALanguage : UnicodeString;
- var AAliases : TUnicodeStringArray
- ) : PUCA_DataBook;
- var
- fileName : UnicodeString;
- begin
- fileName := ADirectory;
- if (fileName <> '') then begin
- if (fileName[Length(fileName)] <> DirectorySeparator) then
- fileName := fileName + DirectorySeparator;
- end;
- fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
- Result := LoadCollation(fileName,AAliases);
- end;
- function LoadCollation(
- const ADirectory,
- ALanguage : UnicodeString
- ) : PUCA_DataBook;
- var
- al : TUnicodeStringArray;
- begin
- al := nil;
- Result := LoadCollation(ADirectory,ALanguage,al);
- end;
- {$INCLUDE unicodedata.inc}
- {$IFDEF ENDIAN_LITTLE}
- {$INCLUDE unicodedata_le.inc}
- {$ENDIF ENDIAN_LITTLE}
- {$IFDEF ENDIAN_BIG}
- {$INCLUDE unicodedata_be.inc}
- {$ENDIF ENDIAN_BIG}
- procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
- begin
- AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
- ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
- end;
- function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
- begin
- Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
- (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
- end;
- function UnicodeIsSurrogatePair(
- const AHighSurrogate,
- ALowSurrogate : UnicodeChar
- ) : Boolean;
- begin
- Result :=
- ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
- (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
- ) and
- ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
- (Word(ALowSurrogate) <= LOW_SURROGATE_END)
- )
- end;
- function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
- begin
- Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
- (Word(AValue) <= HIGH_SURROGATE_END);
- end;
- function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
- begin
- Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
- (Word(AValue) <= LOW_SURROGATE_END);
- end;
- function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result:=
- @UC_PROP_ARRAY[
- UC_TABLE_3[
- UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
- [lo(ACodePoint) shr 4]
- ][lo(ACodePoint) and $F]
- ]; {
- @UC_PROP_ARRAY[
- UC_TABLE_2[
- (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
- WordRec(ACodePoint).Lo
- ]
- ];}
- end;
- function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result:=
- @UC_PROP_ARRAY[
- UCO_TABLE_3[
- UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
- [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
- ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
- ]; {
- Result:=
- @UC_PROP_ARRAY[
- UCO_TABLE_2[
- (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
- Word(ALowS) - LOW_SURROGATE_BEGIN
- ]
- ]; }
- end;
- function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
- var
- l, h : UnicodeChar;
- begin
- if (ACodePoint <= High(Word)) then
- exit(GetProps(Word(ACodePoint)));
- FromUCS4(ACodePoint,h,l);
- Result := GetProps(h,l);
- end;
- function UnicodeToUpper(
- const AString : UnicodeString;
- const AIgnoreInvalidSequence : Boolean;
- out AResultString : UnicodeString
- ) : Integer;
- var
- i, c : SizeInt;
- pp, pr : PUnicodeChar;
- pu : PUC_Prop;
- locIsSurrogate : Boolean;
- r : UnicodeString;
- begin
- c := Length(AString);
- SetLength(r,2*c);
- if (c > 0) then begin
- pp := @AString[1];
- pr := @r[1];
- i := 1;
- while (i <= c) do begin
- pu := GetProps(Word(pp^));
- locIsSurrogate := (pu^.Category = UGC_Surrogate);
- if locIsSurrogate then begin
- if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
- if AIgnoreInvalidSequence then begin
- pr^ := pp^;
- Inc(pp);
- Inc(pr);
- Inc(i);
- Continue;
- end;
- exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
- end;
- pu := GetProps(pp^,AString[i+1]);
- end;
- if (pu^.SimpleUpperCase = 0) then begin
- pr^ := pp^;
- if locIsSurrogate then begin
- Inc(pp);
- Inc(pr);
- Inc(i);
- pr^ := pp^;
- end;
- end else begin
- if (pu^.SimpleUpperCase <= $FFFF) then begin
- pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
- end else begin
- FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
- Inc(pr);
- end;
- if locIsSurrogate then begin
- Inc(pp);
- Inc(i);
- end;
- end;
- Inc(pp);
- Inc(pr);
- Inc(i);
- end;
- Dec(pp);
- i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
- SetLength(r,i);
- AResultString := r;
- end;
- Result := 0;
- end;
- function UnicodeToLower(
- const AString : UnicodeString;
- const AIgnoreInvalidSequence : Boolean;
- out AResultString : UnicodeString
- ) : Integer;
- var
- i, c : SizeInt;
- pp, pr : PUnicodeChar;
- pu : PUC_Prop;
- locIsSurrogate : Boolean;
- r : UnicodeString;
- begin
- c := Length(AString);
- SetLength(r,2*c);
- if (c > 0) then begin
- pp := @AString[1];
- pr := @r[1];
- i := 1;
- while (i <= c) do begin
- pu := GetProps(Word(pp^));
- locIsSurrogate := (pu^.Category = UGC_Surrogate);
- if locIsSurrogate then begin
- if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
- if AIgnoreInvalidSequence then begin
- pr^ := pp^;
- Inc(pp);
- Inc(pr);
- Inc(i);
- Continue;
- end;
- exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
- end;
- pu := GetProps(pp^,AString[i+1]);
- end;
- if (pu^.SimpleLowerCase = 0) then begin
- pr^ := pp^;
- if locIsSurrogate then begin
- Inc(pp);
- Inc(pr);
- Inc(i);
- pr^ := pp^;
- end;
- end else begin
- if (pu^.SimpleLowerCase <= $FFFF) then begin
- pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
- end else begin
- FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
- Inc(pr);
- end;
- if locIsSurrogate then begin
- Inc(pp);
- Inc(i);
- end;
- end;
- Inc(pp);
- Inc(pr);
- Inc(i);
- end;
- Dec(pp);
- i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
- SetLength(r,i);
- AResultString := r;
- end;
- Result := 0;
- end;
- //----------------------------------------------------------------------
- function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
- const
- SBase = $AC00;
- LBase = $1100;
- VBase = $1161;
- TBase = $11A7;
- LCount = 19;
- VCount = 21;
- TCount = 28;
- NCount = VCount * TCount; // 588
- SCount = LCount * NCount; // 11172
- var
- SIndex, L, V, T : Integer;
- begin
- SIndex := AChar - SBase;
- if (SIndex < 0) or (SIndex >= SCount) then begin
- ABuffer^ := AChar;
- exit(1);
- end;
- L := LBase + SIndex div NCount;
- V := VBase + (SIndex mod NCount) div TCount;
- T := TBase + SIndex mod TCount;
- ABuffer[0] := L;
- ABuffer[1] := V;
- Result := 2;
- if (T <> TBase) then begin
- ABuffer[2] := T;
- Inc(Result);
- end;
- end;
- function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
- var
- locStack : array[0..23] of Cardinal;
- locStackIdx : Integer;
- ResultBuffer : array[0..23] of Cardinal;
- ResultIdx : Integer;
- procedure AddCompositionToStack(const AIndex : Integer);
- var
- pdecIdx : ^TDecompositionIndexRec;
- k, kc : Integer;
- pu : ^UInt24;
- begin
- pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
- pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.S]);
- kc := pdecIdx^.L;
- Inc(pu,kc);
- for k := 1 to kc do begin
- Dec(pu);
- locStack[locStackIdx + k] := pu^;
- end;
- locStackIdx := locStackIdx + kc;
- end;
- procedure AddResult(const AChar : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Inc(ResultIdx);
- ResultBuffer[ResultIdx] := AChar;
- end;
- function PopStack() : Cardinal;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := locStack[locStackIdx];
- Dec(locStackIdx);
- end;
- var
- cu : Cardinal;
- decIdx : SmallInt;
- locIsWord : Boolean;
- i : Integer;
- p : PUnicodeChar;
- begin
- ResultIdx := -1;
- locStackIdx := -1;
- AddCompositionToStack(ADecomposeIndex);
- while (locStackIdx >= 0) do begin
- cu := PopStack();
- locIsWord := (cu <= MAX_WORD);
- if locIsWord then
- decIdx := GetProps(Word(cu))^.DecompositionID
- else
- decIdx := GetProps(cu)^.DecompositionID;
- if (decIdx = -1) then
- AddResult(cu)
- else
- AddCompositionToStack(decIdx);
- end;
- p := ABuffer;
- Result := 0;
- for i := 0 to ResultIdx do begin
- cu := ResultBuffer[i];
- if (cu <= MAX_WORD) then begin
- p[0] := UnicodeChar(Word(cu));
- Inc(p);
- end else begin
- FromUCS4(cu,p[0],p[1]);
- Inc(p,2);
- Inc(Result);
- end;
- end;
- Result := Result + ResultIdx + 1;
- end;
- procedure CanonicalOrder(var AString : UnicodeString);
- begin
- CanonicalOrder(@AString[1],Length(AString));
- end;
- procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
- var
- i, c : SizeInt;
- p, q : PUnicodeChar;
- locIsSurrogateP, locIsSurrogateQ : Boolean;
- procedure Swap();
- var
- t, t1 : UnicodeChar;
- begin
- if not locIsSurrogateP then begin
- if not locIsSurrogateQ then begin
- t := p^;
- p^ := q^;
- q^ := t;
- exit;
- end;
- t := p^;
- p[0] := q[0];
- p[1] := q[1];
- q[1] := t;
- exit;
- end;
- if not locIsSurrogateQ then begin
- t := q[0];
- p[2] := p[1];
- p[1] := p[0];
- p[0] := t;
- exit;
- end;
- t := p[0];
- t1 := p[1];
- p[0] := q[0];
- p[1] := q[1];
- q[0] := t;
- q[1] := t1;
- end;
- var
- pu : PUC_Prop;
- cccp, cccq : Byte;
- begin
- c := ALength;
- if (c < 2) then
- exit;
- p := AStr;
- i := 1;
- while (i < c) do begin
- pu := GetProps(Word(p^));
- locIsSurrogateP := (pu^.Category = UGC_Surrogate);
- if locIsSurrogateP then begin
- if (i = (c - 1)) then
- Break;
- if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
- Inc(p);
- Inc(i);
- Continue;
- end;
- pu := GetProps(p[0],p[1]);
- end;
- if (pu^.C3 > 0) then begin
- cccp := pu^.C3;
- if locIsSurrogateP then
- q := p + 2
- else
- q := p + 1;
- pu := GetProps(Word(q^));
- locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
- if locIsSurrogateQ then begin
- if (i = c) then
- Break;
- if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
- Inc(p);
- Inc(i);
- Continue;
- end;
- pu := GetProps(q[0],q[1]);
- end;
- cccq := pu^.C3;
- if (cccq > 0) and (cccp > cccq) then begin
- Swap();
- if (i > 1) then begin
- Dec(p);
- Dec(i);
- pu := GetProps(Word(p^));
- if (pu^.Category = UGC_Surrogate) then begin
- if (i > 1) then begin
- Dec(p);
- Dec(i);
- end;
- end;
- Continue;
- end;
- end;
- end;
- if locIsSurrogateP then begin
- Inc(p);
- Inc(i);
- end;
- Inc(p);
- Inc(i);
- end;
- end;
- //Canonical Decomposition
- function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
- begin
- Result := NormalizeNFD(@AString[1],Length(AString));
- end;
- function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
- const MAX_EXPAND = 3;
- var
- i, c, kc, k : SizeInt;
- pp, pr : PUnicodeChar;
- pu : PUC_Prop;
- locIsSurrogate : Boolean;
- cpArray : array[0..7] of Cardinal;
- cp : Cardinal;
- begin
- c := ALength;
- SetLength(Result,(MAX_EXPAND*c));
- if (c > 0) then begin
- pp := AStr;
- pr := @Result[1];
- i := 1;
- while (i <= c) do begin
- pu := GetProps(Word(pp^));
- locIsSurrogate := (pu^.Category = UGC_Surrogate);
- if locIsSurrogate then begin
- if (i = c) then
- Break;
- if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
- pr^ := pp^;
- Inc(pp);
- Inc(pr);
- Inc(i);
- Continue;
- end;
- pu := GetProps(pp[0],pp[1]);
- end;
- if pu^.HangulSyllable then begin
- if locIsSurrogate then begin
- cp := ToUCS4(pp[0],pp[1]);
- Inc(pp);
- Inc(i);
- end else begin
- cp := Word(pp^);
- end;
- kc := DecomposeHangul(cp,@cpArray[0]);
- for k := 0 to kc - 1 do begin
- if (cpArray[k] <= MAX_WORD) then begin
- pr^ := UnicodeChar(Word(cpArray[k]));
- pr := pr + 1;
- end else begin
- FromUCS4(cpArray[k],pr[0],pr[1]);
- pr := pr + 2;
- end;
- end;
- if (kc > 0) then
- Dec(pr);
- end else begin
- if (pu^.DecompositionID = -1) then begin
- pr^ := pp^;
- if locIsSurrogate then begin
- Inc(pp);
- Inc(pr);
- Inc(i);
- pr^ := pp^;
- end;
- end else begin
- k := Decompose(pu^.DecompositionID,pr);
- pr := pr + (k - 1);
- if locIsSurrogate then begin
- Inc(pp);
- Inc(i);
- end;
- end;
- end;
- Inc(pp);
- Inc(pr);
- Inc(i);
- end;
- Dec(pp);
- i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
- SetLength(Result,i);
- CanonicalOrder(@Result[1],Length(Result));
- end;
- end;
- { TUCA_PropItemContextTreeNodeRec }
- function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
- begin
- if (Self.Left = 0) then
- Result := nil
- else
- Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
- end;
- function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
- begin
- if (Self.Right = 0) then
- Result := nil
- else
- Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
- end;
- { TUCA_PropItemContextRec }
- function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
- begin
- Result := PUInt24(
- PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
- SizeOf(Self.WeightCount)
- );
- end;
- function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
- begin
- Result := PUCA_PropWeights(
- PtrUInt(@Self) +
- SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
- (Self.CodePointCount*SizeOf(UInt24))
- );
- end;
- { TUCA_PropItemContextTreeRec }
- function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
- begin
- if (Size = 0) then
- Result := nil
- else
- Result := PUCA_PropItemContextTreeNodeRec(
- PtrUInt(
- PtrUInt(@Self) + SizeOf(UInt24){Size}
- )
- );
- end;
- function CompareCodePoints(
- A : PUInt24; LA : Integer;
- B : PUInt24; LB : Integer
- ) : Integer;
- var
- i, hb : Integer;
- begin
- if (A = B) then
- exit(0);
- Result := 1;
- hb := LB - 1;
- for i := 0 to LA - 1 do begin
- if (i > hb) then
- exit;
- if (A[i] < B[i]) then
- exit(-1);
- if (A[i] > B[i]) then
- exit(1);
- end;
- if (LA = LB) then
- exit(0);
- exit(-1);
- end;
- function TUCA_PropItemContextTreeRec.Find(
- const AChars : PUInt24;
- const ACharCount : Integer;
- out ANode : PUCA_PropItemContextTreeNodeRec
- ) : Boolean;
- var
- t : PUCA_PropItemContextTreeNodeRec;
- begin
- t := Data;
- while (t <> nil) do begin
- case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
- 0 : Break;
- -1 : t := t^.GetLeftNode();
- else
- t := t^.GetRightNode();
- end;
- end;
- Result := (t <> nil);
- if Result then
- ANode := t;
- end;
- { TUC_Prop }
- function TUC_Prop.GetCategory: Byte;
- begin
- Result := Byte((C and Byte($F8)) shr 3);
- end;
- function TUC_Prop.GetNumericValue: Double;
- begin
- Result := UC_NUMERIC_ARRAY[NumericIndex];
- end;
- function TUC_Prop.GetUnifiedIdeograph : Boolean;
- begin
- Result := IsBitON(C,2);
- end;
- procedure TUC_Prop.SetCategory(AValue: Byte);
- begin
- C := Byte(C or Byte(AValue shl 3));
- end;
- function TUC_Prop.GetWhiteSpace: Boolean;
- begin
- Result := IsBitON(C,0);
- end;
- procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
- begin
- SetBit(C,0,AValue);
- end;
- function TUC_Prop.GetHangulSyllable: Boolean;
- begin
- Result := IsBitON(C,1);
- end;
- procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
- begin
- SetBit(C,1,AValue);
- end;
- { TUCA_DataBook }
- function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
- begin
- Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
- (AWeight^.Weights[0] <= Self.VariableHighLimit);
- end;
- { TUCA_PropItemRec }
- function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
- end;
- function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
- end;
- function TUCA_PropItemRec.GetCodePoint() : UInt24;
- begin
- if HasCodePoint() then begin
- if Contextual then
- Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
- PUInt24(
- PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
- Cardinal(GetContext()^.Size)
- )^
- )
- else
- Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
- PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
- )
- end else begin
- {$ifdef uni_debug}
- raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
- {$else uni_debug}
- Result := ZERO_UINT24;
- {$endif uni_debug}
- end
- end;
- function TUCA_PropItemRec.HasCodePoint() : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_CODEPOINT);
- end;
- function TUCA_PropItemRec.IsValid() : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_VALID);
- end;
- {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
- begin
- Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
- end;}
- procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
- var
- c : Integer;
- p : PByte;
- pd : PUCA_PropWeights;
- begin
- c := WeightLength;
- p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
- pd := ADest;
- pd^.Weights[0] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
- p := p + 2;
- if not IsWeightCompress_1() then begin
- pd^.Weights[1] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
- p := p + 2;
- end else begin
- pd^.Weights[1] := p^;
- p := p + 1;
- end;
- if not IsWeightCompress_2() then begin
- pd^.Weights[2] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
- p := p + 2;
- end else begin
- pd^.Weights[2] := p^;
- p := p + 1;
- end;
- if (c > 1) then
- Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
- end;
- function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
- begin
- Result := SizeOf(TUCA_PropItemRec);
- if (WeightLength > 0) then begin
- Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
- if IsWeightCompress_1() then
- Result := Result - 1;
- if IsWeightCompress_2() then
- Result := Result - 1;
- end;
- if HasCodePoint() then
- Result := Result + SizeOf(UInt24);
- if Contextual then
- Result := Result + Cardinal(GetContext()^.Size);
- end;
- function TUCA_PropItemRec.GetContextual: Boolean;
- begin
- Result := IsBitON(Flags,FLAG_CONTEXTUAL);
- end;
- function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
- var
- p : PtrUInt;
- begin
- if not Contextual then
- exit(nil);
- p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
- if IsBitON(Flags,FLAG_CODEPOINT) then
- p := p + SizeOf(UInt24);
- Result := PUCA_PropItemContextTreeRec(p);
- end;
- function TUCA_PropItemRec.IsDeleted() : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_DELETION);
- end;
- function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
- var
- i : Cardinal;
- begin
- if (ABook^.BMP_Table2 = nil) then
- exit(nil);
- i := PUInt24(
- PtrUInt(ABook^.BMP_Table2) +
- ( ((ABook^.BMP_Table1[Hi(Word(AChar))] * 256) + Lo(Word(AChar))) *
- SizeOf(UInt24)
- )
- )^;
- {i := ABook^.BMP_Table2[
- (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
- Lo(Word(AChar))
- ];}
- if (i > 0) then
- Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
- else
- Result := nil;
- end;
- function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
- var
- i : Cardinal;
- begin
- if (ABook^.OBMP_Table2 = nil) then
- exit(nil);
- i := PUInt24(
- PtrUInt(ABook^.OBMP_Table2) +
- ( (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
- Word(ALowS) - LOW_SURROGATE_BEGIN
- ) *
- SizeOf(UInt24)
- )^;
- {i := ABook^.OBMP_Table2[
- (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
- Word(ALowS) - LOW_SURROGATE_BEGIN
- ]; }
- if (i > 0) then
- Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
- else
- Result := nil;
- end;
- {$UNDEF UNI_BUILD_TIME}
- {$include weight_derivation.inc}
- function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
- var
- bb : TUCASortKey;
- begin
- SetLength(bb,Length(B));
- if (Length(bb) > 0) then
- Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
- Result := CompareSortKey(A,bb);
- end;
- function CompareSortKey(const A, B : TUCASortKey) : Integer;
- var
- i, hb : Integer;
- begin
- if (Pointer(A) = Pointer(B)) then
- exit(0);
- Result := 1;
- hb := Length(B) - 1;
- for i := 0 to Length(A) - 1 do begin
- if (i > hb) then
- exit;
- if (A[i] < B[i]) then
- exit(-1);
- if (A[i] > B[i]) then
- exit(1);
- end;
- if (Length(A) = Length(B)) then
- exit(0);
- exit(-1);
- end;
- type
- TUCA_PropWeightsArray = array of TUCA_PropWeights;
- function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
- var
- r : TUCASortKey;
- i, c, k, ral, levelCount : Integer;
- pce : PUCA_PropWeights;
- begin
- c := Length(ACEList);
- if (c = 0) then
- exit(nil);
- levelCount := Length(ACEList[0].Weights);
- if (ACollation^.ComparisonStrength > 0) and
- (ACollation^.ComparisonStrength < levelCount)
- then begin
- levelCount := ACollation^.ComparisonStrength;
- end;
- SetLength(r,(levelCount*c + levelCount));
- ral := 0;
- for i := 0 to levelCount - 1 do begin
- if not ACollation^.Backwards[i] then begin
- pce := @ACEList[0];
- for k := 0 to c - 1 do begin
- if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- pce := pce + 1;
- end;
- end else begin
- pce := @ACEList[c-1];
- for k := 0 to c - 1 do begin
- if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- pce := pce - 1;
- end;
- end;
- r[ral] := 0;
- ral := ral + 1;
- end;
- ral := ral - 1;
- SetLength(r,ral);
- Result := r;
- end;
- function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
- var
- r : TUCASortKey;
- i, c, k, ral, levelCount : Integer;
- pce : PUCA_PropWeights;
- begin
- c := Length(ACEList);
- if (c = 0) then
- exit(nil);
- levelCount := Length(ACEList[0].Weights);
- if (ACollation^.ComparisonStrength > 0) and
- (ACollation^.ComparisonStrength < levelCount)
- then begin
- levelCount := ACollation^.ComparisonStrength;
- end;
- SetLength(r,(levelCount*c + levelCount));
- ral := 0;
- for i := 0 to levelCount - 1 do begin
- if not ACollation^.Backwards[i] then begin
- pce := @ACEList[0];
- for k := 0 to c - 1 do begin
- if (pce^.Weights[i] <> 0) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- pce := pce + 1;
- end;
- end else begin
- pce := @ACEList[c-1];
- for k := 0 to c - 1 do begin
- if (pce^.Weights[i] <> 0) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- pce := pce - 1;
- end;
- end;
- r[ral] := 0;
- ral := ral + 1;
- end;
- ral := ral - 1;
- SetLength(r,ral);
- Result := r;
- end;
- function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
- var
- r : TUCASortKey;
- i, c, k, ral, levelCount : Integer;
- pce : PUCA_PropWeights;
- variableState : Boolean;
- begin
- c := Length(ACEList);
- if (c = 0) then
- exit(nil);
- levelCount := Length(ACEList[0].Weights);
- if (ACollation^.ComparisonStrength > 0) and
- (ACollation^.ComparisonStrength < levelCount)
- then begin
- levelCount := ACollation^.ComparisonStrength;
- end;
- SetLength(r,(levelCount*c + levelCount));
- ral := 0;
- variableState := False;
- for i := 0 to levelCount - 1 do begin
- if not ACollation^.Backwards[i] then begin
- variableState := False;
- pce := @ACEList[0];
- for k := 0 to c - 1 do begin
- if not ACollation^.IsVariable(pce) then begin
- if (pce^.Weights[0] <> 0) then
- variableState := False;
- if (pce^.Weights[i] <> 0) and not(variableState) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- end else begin
- variableState := True;
- end;
- pce := pce + 1;
- end;
- end else begin
- pce := @ACEList[c-1];
- for k := 0 to c - 1 do begin
- if not ACollation^.IsVariable(pce) then begin
- if (pce^.Weights[0] <> 0) then
- variableState := False;
- if (pce^.Weights[i] <> 0) and not(variableState) then begin
- r[ral] := pce^.Weights[i];
- ral := ral + 1;
- end;
- end else begin
- variableState := True;
- end;
- pce := pce - 1;
- end;
- end;
- r[ral] := 0;
- ral := ral + 1;
- end;
- ral := ral - 1;
- SetLength(r,ral);
- Result := r;
- end;
- function FormKeyShiftedTrimmed(
- const ACEList : TUCA_PropWeightsArray;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;
- var
- i : Integer;
- p : ^TUCASortKeyItem;
- begin
- Result := FormKeyShifted(ACEList,ACollation);
- i := Length(Result) - 1;
- if (i >= 0) then begin
- p := @Result[i];
- while (i >= 0) do begin
- if (p^ <> $FFFF) then
- Break;
- Dec(i);
- Dec(p);
- end;
- if ((i+1) < Length(Result)) then
- SetLength(Result,(i+1));
- end;
- end;
- function FindChild(
- const ACodePoint : Cardinal;
- const AParent : PUCA_PropItemRec
- ) : PUCA_PropItemRec;inline;
- var
- k : Integer;
- begin
- Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
- for k := 0 to AParent^.ChildCount - 1 do begin
- if (ACodePoint = Result^.CodePoint) then
- exit;
- Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
- end;
- Result := nil;
- end;
- function ComputeSortKey(
- const AString : UnicodeString;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;
- begin
- Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
- end;
- function ComputeRawSortKey(
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const ACollation : PUCA_DataBook
- ) : TUCA_PropWeightsArray;
- var
- r : TUCA_PropWeightsArray;
- ral {used length of "r"}: Integer;
- rl {capacity of "r"} : Integer;
- procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (rl < AMinGrow) then
- rl := rl + AMinGrow
- else
- rl := 2 * rl;
- SetLength(r,rl);
- end;
- var
- i : Integer;
- s : UnicodeString;
- psBase : PUnicodeChar;
- ps : PUnicodeChar;
- cp : Cardinal;
- cl : PUCA_DataBook;
- pp : PUCA_PropItemRec;
- ppLevel : Byte;
- removedCharIndex : array of DWord;
- removedCharIndexLength : DWord;
- locHistory : array[0..24] of record
- i : Integer;
- cl : PUCA_DataBook;
- pp : PUCA_PropItemRec;
- ppLevel : Byte;
- cp : Cardinal;
- removedCharIndexLength : DWord;
- end;
- locHistoryTop : Integer;
- suppressState : record
- cl : PUCA_DataBook;
- CharCount : Integer;
- end;
- LastKeyOwner : record
- Length : Integer;
- Chars : array[0..24] of UInt24;
- end;
- procedure SaveKeyOwner();
- var
- k : Integer;
- kppLevel : Byte;
- begin
- k := 0;
- kppLevel := High(Byte);
- while (k <= locHistoryTop) do begin
- if (kppLevel <> locHistory[k].ppLevel) then begin
- LastKeyOwner.Chars[k] := locHistory[k].cp;
- kppLevel := locHistory[k].ppLevel;
- end;
- k := k + 1;
- end;
- if (k = 0) or (kppLevel <> ppLevel) then begin
- LastKeyOwner.Chars[k] := cp;
- k := k + 1;
- end;
- LastKeyOwner.Length := k;
- end;
- procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- SaveKeyOwner();
- if ((ral + AItem^.WeightLength) > rl) then
- GrowKey(AItem^.WeightLength);
- AItem^.GetWeightArray(@r[ral]);
- ral := ral + AItem^.WeightLength;
- end;
- procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if ((ral + AItem^.WeightCount) > rl) then
- GrowKey(AItem^.WeightCount);
- Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
- ral := ral + AItem^.WeightCount;
- end;
- procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- SaveKeyOwner();
- if ((ral + 2) > rl) then
- GrowKey();
- DeriveWeight(ACodePoint,@r[ral]);
- ral := ral + 2;
- end;
- procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
- if (suppressState.cl = nil) or
- (suppressState.CharCount > ppLevel)
- then begin
- suppressState.cl := cl;
- suppressState.CharCount := ppLevel;
- end;
- end;
- end;
- procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Inc(locHistoryTop);
- locHistory[locHistoryTop].i := i;
- locHistory[locHistoryTop].cl := cl;
- locHistory[locHistoryTop].pp := pp;
- locHistory[locHistoryTop].ppLevel := ppLevel;
- locHistory[locHistoryTop].cp := cp;
- locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
- RecordDeletion();
- end;
- procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- locHistoryTop := -1;
- end;
- function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := (locHistoryTop >= 0);
- end;
- function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := (locHistoryTop + 1);
- end;
- procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Assert(locHistoryTop >= 0);
- i := locHistory[locHistoryTop].i;
- cp := locHistory[locHistoryTop].cp;
- cl := locHistory[locHistoryTop].cl;
- pp := locHistory[locHistoryTop].pp;
- ppLevel := locHistory[locHistoryTop].ppLevel;
- removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
- ps := psBase + (i-1);
- Dec(locHistoryTop);
- end;
- var
- c : Integer;
- lastUnblockedNonstarterCCC : Byte;
- function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
- var
- k : DWord;
- pk : PUnicodeChar;
- puk : PUC_Prop;
- begin
- k := AStartFrom;
- if (k > c) then
- exit(False);
- if (removedCharIndexLength>0) and
- (IndexInArrayDWord(removedCharIndex,k) >= 0)
- then begin
- exit(False);
- end;
- {if (k = (i+1)) or
- ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
- then
- lastUnblockedNonstarterCCC := 0;}
- pk := psBase + k-1;
- if UnicodeIsHighSurrogate(pk^) then begin
- if (k = c) then
- exit(False);
- if UnicodeIsLowSurrogate(pk[1]) then
- puk := GetProps(pk[0],pk[1])
- else
- puk := GetProps(Word(pk^));
- end else begin
- puk := GetProps(Word(pk^));
- end;
- if (puk^.C3 = 0) or (lastUnblockedNonstarterCCC >= puk^.C3) then
- exit(False);
- lastUnblockedNonstarterCCC := puk^.C3;
- Result := True;
- end;
- procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (removedCharIndexLength >= Length(removedCharIndex)) then
- SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
- removedCharIndex[removedCharIndexLength] := APos;
- Inc(removedCharIndexLength);
- if UnicodeIsHighSurrogate(psBase[APos]) and (APos < c) and UnicodeIsLowSurrogate(psBase[APos+1]) then begin
- if (removedCharIndexLength >= Length(removedCharIndex)) then
- SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
- removedCharIndex[removedCharIndexLength] := APos+1;
- Inc(removedCharIndexLength);
- end;
- end;
- procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (removedCharIndexLength = 0) then begin
- Inc(i);
- Inc(ps);
- exit;
- end;
- while True do begin
- Inc(i);
- Inc(ps);
- if (IndexInArrayDWord(removedCharIndex,i) = -1) then
- Break;
- end;
- end;
- var
- surrogateState : Boolean;
- function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := True;
- if UnicodeIsHighSurrogate(ps[0]) then begin
- if (i = c) then
- exit(False);
- if UnicodeIsLowSurrogate(ps[1]) then begin
- surrogateState := True;
- cp := ToUCS4(ps[0],ps[1]);
- end else begin
- surrogateState := False;
- cp := Word(ps[0]);
- end;
- end else begin
- surrogateState := False;
- cp := Word(ps[0]);
- end;
- end;
- procedure ClearPP(const AClearSuppressInfo : Boolean = True);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- cl := nil;
- pp := nil;
- ppLevel := 0;
- if AClearSuppressInfo then begin
- suppressState.cl := nil;
- suppressState.CharCount := 0;
- end;
- end;
- function FindPropUCA() : Boolean;
- var
- candidateCL : PUCA_DataBook;
- begin
- pp := nil;
- if (cl = nil) then
- candidateCL := ACollation
- else
- candidateCL := cl;
- if surrogateState then begin
- while (candidateCL <> nil) do begin
- pp := GetPropUCA(ps[0],ps[1],candidateCL);
- if (pp <> nil) then
- break;
- candidateCL := candidateCL^.Base;
- end;
- end else begin
- while (candidateCL <> nil) do begin
- pp := GetPropUCA(ps[0],candidateCL);
- if (pp <> nil) then
- break;
- candidateCL := candidateCL^.Base;
- end;
- end;
- cl := candidateCL;
- Result := (pp <> nil);
- end;
- procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- var
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- if (pp^.WeightLength > 0) then begin
- AddWeights(pp);
- end else
- if (LastKeyOwner.Length > 0) and pp^.Contextual and
- pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- end;
- //AddWeights(pp);
- ClearHistory();
- ClearPP();
- end;
- procedure StartMatch();
- procedure HandleLastChar();
- var
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- while True do begin
- if pp^.IsValid() then begin
- if (pp^.WeightLength > 0) then
- AddWeights(pp)
- else
- if (LastKeyOwner.Length > 0) and pp^.Contextual and
- pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then
- AddContextWeights(@ctxNode^.Data)
- else
- AddComputedWeights(cp){handle deletion of code point};
- break;
- end;
- if (cl^.Base = nil) then begin
- AddComputedWeights(cp);
- break;
- end;
- cl := cl^.Base;
- if not FindPropUCA() then begin
- AddComputedWeights(cp);
- break;
- end;
- end;
- end;
- var
- tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- ppLevel := 0;
- if not FindPropUCA() then begin
- AddComputedWeights(cp);
- ClearHistory();
- ClearPP();
- end else begin
- if (i = c) then begin
- HandleLastChar();
- end else begin
- if pp^.IsValid()then begin
- if (pp^.ChildCount = 0) then begin
- if (pp^.WeightLength > 0) then
- AddWeights(pp)
- else
- if (LastKeyOwner.Length > 0) and pp^.Contextual and
- pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
- (tmpCtxNode^.Data.WeightCount > 0)
- then
- AddContextWeights(@tmpCtxNode^.Data)
- else
- AddComputedWeights(cp){handle deletion of code point};
- ClearPP();
- ClearHistory();
- end else begin
- RecordStep();
- end
- end else begin
- if (pp^.ChildCount = 0) then begin
- AddComputedWeights(cp);
- ClearPP();
- ClearHistory();
- end else begin
- RecordStep();
- end;
- end ;
- end;
- end;
- end;
- function TryPermutation() : Boolean;
- var
- kk, kkidx : Integer;
- b : Boolean;
- puk : PUC_Prop;
- ppk : PUCA_PropItemRec;
- begin
- Result := False;
- puk := GetProps(cp);
- if (puk^.C3 = 0) then
- exit;
- lastUnblockedNonstarterCCC := puk^.C3;
- if surrogateState then
- kk := i + 2
- else
- kk := i + 1;
- while IsUnblockedNonstarter(kk) do begin
- kkidx := kk-1;
- b := UnicodeIsHighSurrogate(psBase[kkidx]) and (kk<c) and UnicodeIsLowSurrogate(psBase[kkidx+1]);
- if b then
- ppk := FindChild(ToUCS4(psBase[kkidx],psBase[kkidx+1]),pp)
- else
- ppk := FindChild(Word(psBase[kkidx]),pp);
- if (ppk <> nil) then begin
- pp := ppk;
- RemoveChar(kk);
- Inc(ppLevel);
- RecordStep();
- Result := True;
- if (pp^.ChildCount = 0 ) then
- Break;
- end;
- if b then
- Inc(kk);
- Inc(kk);
- end;
- end;
- procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
- Inc(i);
- Inc(ps);
- end;
- Inc_I();
- end;
- var
- ok : Boolean;
- pp1 : PUCA_PropItemRec;
- cltemp : PUCA_DataBook;
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- if (ALength = 0) then
- exit(nil);
- s := '';
- if ACollation^.NoNormalization then begin
- psBase := AStr;
- c := ALength;
- end else begin
- s := NormalizeNFD(AStr,ALength);
- c := Length(s);
- psBase := @s[1];
- end;
- rl := 3*c;
- SetLength(r,rl);
- ral := 0;
- ps := psBase;
- ClearPP();
- locHistoryTop := -1;
- removedCharIndexLength := 0;
- FillChar(suppressState,SizeOf(suppressState),#0);
- LastKeyOwner.Length := 0;
- i := 1;
- while (i <= c) and MoveToNextChar() do begin
- if (pp = nil) then begin // Start Matching
- StartMatch();
- end else begin
- pp1 := FindChild(cp,pp);
- if (pp1 <> nil) then begin
- Inc(ppLevel);
- pp := pp1;
- if (pp^.ChildCount = 0) or (i = c) then begin
- ok := False;
- if pp^.IsValid() and (suppressState.CharCount = 0) then begin
- if (pp^.WeightLength > 0) then begin
- AddWeightsAndClear();
- ok := True;
- end else
- if (LastKeyOwner.Length > 0) and pp^.Contextual and
- pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- ClearHistory();
- ClearPP();
- ok := True;
- end
- end;
- if not ok then begin
- RecordDeletion();
- ok := False;
- while HasHistory() do begin
- GoBack();
- if pp^.IsValid() and
- ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
- ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- Break;
- end;
- end;
- if not ok then begin
- cltemp := cl^.Base;
- if (cltemp <> nil) then begin
- ClearPP(False);
- cl := cltemp;
- Continue;
- end;
- end;
- if not ok then begin
- AddComputedWeights(cp);
- ClearHistory();
- ClearPP();
- end;
- end;
- end else begin
- RecordStep();
- end;
- end else begin
- // permutations !
- ok := False;
- if TryPermutation() and pp^.IsValid() then begin
- if (suppressState.CharCount = 0) then begin
- AddWeightsAndClear();
- Continue;
- end;
- while True do begin
- if pp^.IsValid() and
- (pp^.WeightLength > 0) and
- ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
- ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- break;
- end;
- if not HasHistory() then
- break;
- GoBack();
- if (pp = nil) then
- break;
- end;
- end;
- if not ok then begin
- if pp^.IsValid() and (suppressState.CharCount = 0) then begin
- if (pp^.WeightLength > 0) then begin
- AddWeightsAndClear();
- ok := True;
- end else
- if (LastKeyOwner.Length > 0) and pp^.Contextual and
- pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- ClearHistory();
- ClearPP();
- ok := True;
- end
- end;
- if ok then
- Continue;
- end;
- if not ok then begin
- if (cl^.Base <> nil) then begin
- cltemp := cl^.Base;
- while HasHistory() do
- GoBack();
- pp := nil;
- ppLevel := 0;
- cl := cltemp;
- Continue;
- end;
- //walk back
- ok := False;
- while HasHistory() do begin
- GoBack();
- if pp^.IsValid() and
- (pp^.WeightLength > 0) and
- ( (suppressState.CharCount = 0) or
- ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
- ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
- )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- Break;
- end;
- end;
- if ok then begin
- AdvanceCharPos();
- Continue;
- end;
- if (pp <> nil) then begin
- AddComputedWeights(cp);
- ClearHistory();
- ClearPP();
- end;
- end;
- end;
- end;
- if surrogateState then begin
- Inc(ps);
- Inc(i);
- end;
- //
- Inc_I();
- end;
- SetLength(r,ral);
- Result := r;
- end;
- type
- TComputeKeyContext = record
- Collation : PUCA_DataBook;
- r : TUCA_PropWeightsArray;
- ral {used length of "r"}: Integer;
- rl {capacity of "r"} : Integer;
- i : Integer;
- s : UnicodeString;
- ps : PUnicodeChar;
- cp : Cardinal;
- cl : PUCA_DataBook;
- pp : PUCA_PropItemRec;
- ppLevel : Byte;
- removedCharIndex : array of DWord;
- removedCharIndexLength : DWord;
- locHistoryTop : Integer;
- locHistory : array[0..24] of record
- i : Integer;
- cl : PUCA_DataBook;
- pp : PUCA_PropItemRec;
- ppLevel : Byte;
- cp : Cardinal;
- removedCharIndexLength : DWord;
- end;
- suppressState : record
- cl : PUCA_DataBook;
- CharCount : Integer;
- end;
- LastKeyOwner : record
- Length : Integer;
- Chars : array[0..24] of UInt24;
- end;
- c : Integer;
- lastUnblockedNonstarterCCC : Byte;
- surrogateState : Boolean;
- Finished : Boolean;
- end;
- PComputeKeyContext = ^TComputeKeyContext;
- procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
- begin
- AContext^.cl := nil;
- AContext^.pp := nil;
- AContext^.ppLevel := 0;
- if AClearSuppressInfo then begin
- AContext^.suppressState.cl := nil;
- AContext^.suppressState.CharCount := 0;
- end;
- end;
- procedure InitContext(
- AContext : PComputeKeyContext;
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const ACollation : PUCA_DataBook
- );
- begin
- AContext^.Collation := ACollation;
- AContext^.c := ALength;
- AContext^.s := NormalizeNFD(AStr,AContext^.c);
- AContext^.c := Length(AContext^.s);
- AContext^.rl := 3*AContext^.c;
- SetLength(AContext^.r,AContext^.rl);
- AContext^.ral := 0;
- AContext^.ps := @AContext^.s[1];
- ClearPP(AContext);
- AContext^.locHistoryTop := -1;
- AContext^.removedCharIndexLength := 0;
- FillChar(AContext^.suppressState,SizeOf(AContext^.suppressState),#0);
- AContext^.LastKeyOwner.Length := 0;
- AContext^.i := 1;
- AContext^.Finished := False;
- end;
- function FormKey(
- const AWeightArray : TUCA_PropWeightsArray;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;inline;
- begin
- case ACollation.VariableWeight of
- TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
- TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
- TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
- TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
- else
- Result := FormKeyShifted(AWeightArray,ACollation);
- end;
- end;
- function ComputeRawSortKeyNextItem(
- const AContext : PComputeKeyContext
- ) : Boolean;forward;
- function IncrementalCompareString_NonIgnorable(
- const AStrA : PUnicodeChar;
- const ALengthA : SizeInt;
- const AStrB : PUnicodeChar;
- const ALengthB : SizeInt;
- const ACollation : PUCA_DataBook
- ) : Integer;
- var
- ctxA, ctxB : TComputeKeyContext;
- lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
- keyIndexB : Integer;
- keyA, keyB : TUCASortKey;
- begin
- if ( (ALengthA = 0) and (ALengthB = 0) ) or
- ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
- (ALengthA = ALengthB)
- )
- then
- exit(0);
- if (ALengthA = 0) then
- exit(-1);
- if (ALengthB = 0) then
- exit(1);
- InitContext(@ctxA,AStrA,ALengthA,ACollation);
- InitContext(@ctxB,AStrB,ALengthB,ACollation);
- lastKeyIndexA := -1;
- keyIndexA := -1;
- lengthMaxA := 0;
- keyIndexB := -1;
- while True do begin
- if not ComputeRawSortKeyNextItem(@ctxA) then
- Break;
- if (ctxA.ral = lengthMaxA) then
- Continue;
- lengthMaxA := ctxA.ral;
- keyIndexA := lastKeyIndexA + 1;
- while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
- Inc(keyIndexA);
- end;
- if (keyIndexA = lengthMaxA) then begin
- lastKeyIndexA := keyIndexA-1;
- Continue;
- end;
- while (keyIndexA < lengthMaxA) do begin
- if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
- Inc(keyIndexA);
- Continue;
- end;
- Inc(keyIndexB);
- while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
- if (ctxB.ral <= keyIndexB) then begin
- if not ComputeRawSortKeyNextItem(@ctxB) then
- Break;
- Continue;
- end;
- Inc(keyIndexB);
- end;
- if (ctxB.ral <= keyIndexB) then
- exit(1);
- if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
- exit(1);
- if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
- exit(-1);
- Inc(keyIndexA);
- end;
- lastKeyIndexA := keyIndexA - 1;
- end;
- //Key(A) is completed !
- Inc(keyIndexB);
- while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
- if (ctxB.ral <= keyIndexB) then begin
- if not ComputeRawSortKeyNextItem(@ctxB) then
- Break;
- Continue;
- end;
- Inc(keyIndexB);
- end;
- if (ctxB.ral > keyIndexB) then begin
- //B has at least one more primary weight that A
- exit(-1);
- end;
- while ComputeRawSortKeyNextItem(@ctxB) do begin
- //
- end;
- //Key(B) is completed !
- keyA := FormKey(ctxA.r,ctxA.Collation);
- keyB := FormKey(ctxB.r,ctxB.Collation);
- Result := CompareSortKey(keyA,keyB);
- end;
- function IncrementalCompareString_Shift(
- const AStrA : PUnicodeChar;
- const ALengthA : SizeInt;
- const AStrB : PUnicodeChar;
- const ALengthB : SizeInt;
- const ACollation : PUCA_DataBook
- ) : Integer;
- var
- ctxA, ctxB : TComputeKeyContext;
- lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
- keyIndexB : Integer;
- keyA, keyB : TUCASortKey;
- begin
- if ( (ALengthA = 0) and (ALengthB = 0) ) or
- ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
- (ALengthA = ALengthB)
- )
- then
- exit(0);
- if (ALengthA = 0) then
- exit(-1);
- if (ALengthB = 0) then
- exit(1);
- InitContext(@ctxA,AStrA,ALengthA,ACollation);
- InitContext(@ctxB,AStrB,ALengthB,ACollation);
- lastKeyIndexA := -1;
- keyIndexA := -1;
- lengthMaxA := 0;
- keyIndexB := -1;
- while True do begin
- if not ComputeRawSortKeyNextItem(@ctxA) then
- Break;
- if (ctxA.ral = lengthMaxA) then
- Continue;
- lengthMaxA := ctxA.ral;
- keyIndexA := lastKeyIndexA + 1;
- while (keyIndexA < lengthMaxA) and
- ( (ctxA.r[keyIndexA].Weights[0] = 0) or
- ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
- )
- do begin
- Inc(keyIndexA);
- end;
- if (keyIndexA = lengthMaxA) then begin
- lastKeyIndexA := keyIndexA-1;
- Continue;
- end;
- while (keyIndexA < lengthMaxA) do begin
- if (ctxA.r[keyIndexA].Weights[0] = 0) or
- ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
- then begin
- Inc(keyIndexA);
- Continue;
- end;
- Inc(keyIndexB);
- while (ctxB.ral <= keyIndexB) or
- (ctxB.r[keyIndexB].Weights[0] = 0) or
- ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
- do begin
- if (ctxB.ral <= keyIndexB) then begin
- if not ComputeRawSortKeyNextItem(@ctxB) then
- Break;
- Continue;
- end;
- Inc(keyIndexB);
- end;
- if (ctxB.ral <= keyIndexB) then
- exit(1);
- if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
- exit(1);
- if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
- exit(-1);
- Inc(keyIndexA);
- end;
- lastKeyIndexA := keyIndexA - 1;
- end;
- //Key(A) is completed !
- Inc(keyIndexB);
- while (ctxB.ral <= keyIndexB) or
- (ctxB.r[keyIndexB].Weights[0] = 0) or
- ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
- do begin
- if (ctxB.ral <= keyIndexB) then begin
- if not ComputeRawSortKeyNextItem(@ctxB) then
- Break;
- Continue;
- end;
- Inc(keyIndexB);
- end;
- if (ctxB.ral > keyIndexB) then begin
- //B has at least one more primary weight that A
- exit(-1);
- end;
- while ComputeRawSortKeyNextItem(@ctxB) do begin
- //
- end;
- //Key(B) is completed !
- keyA := FormKey(ctxA.r,ctxA.Collation);
- keyB := FormKey(ctxB.r,ctxB.Collation);
- Result := CompareSortKey(keyA,keyB);
- end;
- function IncrementalCompareString(
- const AStrA : PUnicodeChar;
- const ALengthA : SizeInt;
- const AStrB : PUnicodeChar;
- const ALengthB : SizeInt;
- const ACollation : PUCA_DataBook
- ) : Integer;
- begin
- case ACollation^.VariableWeight of
- TUCA_VariableKind.ucaNonIgnorable :
- begin
- Result := IncrementalCompareString_NonIgnorable(
- AStrA,ALengthA,AStrB,ALengthB,ACollation
- );
- end;
- TUCA_VariableKind.ucaBlanked,
- TUCA_VariableKind.ucaShiftedTrimmed,
- TUCA_VariableKind.ucaIgnoreSP,
- TUCA_VariableKind.ucaShifted:
- begin
- Result := IncrementalCompareString_Shift(
- AStrA,ALengthA,AStrB,ALengthB,ACollation
- );
- end;
- else
- begin
- Result := IncrementalCompareString_Shift(
- AStrA,ALengthA,AStrB,ALengthB,ACollation
- );
- end;
- end;
- end;
- function IncrementalCompareString(
- const AStrA,
- AStrB : UnicodeString;
- const ACollation : PUCA_DataBook
- ) : Integer;
- begin
- Result := IncrementalCompareString(
- Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
- ACollation
- );
- end;
- function FilterString(
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const AExcludedMask : TCategoryMask
- ) : UnicodeString;
- var
- i, c : SizeInt;
- pp, pr : PUnicodeChar;
- pu : PUC_Prop;
- locIsSurrogate : Boolean;
- begin
- c := ALength;
- SetLength(Result,(2*c));
- if (c > 0) then begin
- pp := AStr;
- pr := @Result[1];
- i := 1;
- while (i <= c) do begin
- pu := GetProps(Word(pp^));
- locIsSurrogate := (pu^.Category = UGC_Surrogate);
- if locIsSurrogate then begin
- if (i = c) then
- Break;
- if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
- Inc(pp);
- Inc(i);
- Continue;
- end;
- pu := GetProps(pp[0],pp[1]);
- end;
- if not(pu^.Category in AExcludedMask) then begin
- pr^ := pp^;
- Inc(pr);
- if locIsSurrogate then begin
- Inc(pp);
- Inc(pr);
- Inc(i);
- pr^ := pp^;
- end;
- end;
- Inc(pp);
- Inc(i);
- end;
- i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
- SetLength(Result,i);
- end;
- end;
- function FilterString(
- const AStr : UnicodeString;
- const AExcludedMask : TCategoryMask
- ) : UnicodeString;
- begin
- if (AStr = '') then
- Result := ''
- else
- Result := FilterString(@AStr[1],Length(AStr),AExcludedMask);
- end;
- function ComputeRawSortKeyNextItem(
- const AContext : PComputeKeyContext
- ) : Boolean;
- var
- ctx : PComputeKeyContext;
- procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (ctx^.rl < AMinGrow) then
- ctx^.rl := ctx^.rl + AMinGrow
- else
- ctx^.rl := 2 * ctx^.rl;
- SetLength(ctx^.r,ctx^.rl);
- end;
- procedure SaveKeyOwner();
- var
- k : Integer;
- kppLevel : Byte;
- begin
- k := 0;
- kppLevel := High(Byte);
- while (k <= ctx^.locHistoryTop) do begin
- if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
- ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
- kppLevel := ctx^.locHistory[k].ppLevel;
- end;
- k := k + 1;
- end;
- if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
- ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
- k := k + 1;
- end;
- ctx^.LastKeyOwner.Length := k;
- end;
- procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- SaveKeyOwner();
- if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
- GrowKey(AItem^.WeightLength);
- AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
- ctx^.ral := ctx^.ral + AItem^.WeightLength;
- end;
- procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
- GrowKey(AItem^.WeightCount);
- Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
- ctx^.ral := ctx^.ral + AItem^.WeightCount;
- end;
- procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- SaveKeyOwner();
- if ((ctx^.ral + 2) > ctx^.rl) then
- GrowKey();
- DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
- ctx^.ral := ctx^.ral + 2;
- end;
- procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
- if (ctx^.suppressState.cl = nil) or
- (ctx^.suppressState.CharCount > ctx^.ppLevel)
- then begin
- ctx^.suppressState.cl := ctx^.cl;
- ctx^.suppressState.CharCount := ctx^.ppLevel;
- end;
- end;
- end;
- procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Inc(ctx^.locHistoryTop);
- ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
- ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
- ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
- ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
- ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
- ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
- RecordDeletion();
- end;
- procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- ctx^.locHistoryTop := -1;
- end;
- function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := (ctx^.locHistoryTop >= 0);
- end;
- function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := (ctx^.locHistoryTop + 1);
- end;
- procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Assert(ctx^.locHistoryTop >= 0);
- ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
- ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
- ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
- ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
- ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
- ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
- ctx^.ps := @ctx^.s[ctx^.i];
- Dec(ctx^.locHistoryTop);
- end;
- function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
- var
- k : DWord;
- pk : PUnicodeChar;
- puk : PUC_Prop;
- begin
- k := AStartFrom;
- if (k > ctx^.c) then
- exit(False);
- if (ctx^.removedCharIndexLength>0) and
- (IndexInArrayDWord(ctx^.removedCharIndex,k) >= 0)
- then begin
- exit(False);
- end;
- {if (k = (i+1)) or
- ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
- then
- lastUnblockedNonstarterCCC := 0;}
- pk := @ctx^.s[k];
- if UnicodeIsHighSurrogate(pk^) then begin
- if (k = ctx^.c) then
- exit(False);
- if UnicodeIsLowSurrogate(pk[1]) then
- puk := GetProps(pk[0],pk[1])
- else
- puk := GetProps(Word(pk^));
- end else begin
- puk := GetProps(Word(pk^));
- end;
- if (puk^.C3 = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.C3) then
- exit(False);
- ctx^.lastUnblockedNonstarterCCC := puk^.C3;
- Result := True;
- end;
- procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
- SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
- ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
- Inc(ctx^.removedCharIndexLength);
- if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
- if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
- SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
- ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
- Inc(ctx^.removedCharIndexLength);
- end;
- end;
- procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if (ctx^.removedCharIndexLength = 0) then begin
- Inc(ctx^.i);
- Inc(ctx^.ps);
- exit;
- end;
- while True do begin
- Inc(ctx^.i);
- Inc(ctx^.ps);
- if (IndexInArrayDWord(ctx^.removedCharIndex,ctx^.i) = -1) then
- Break;
- end;
- end;
- function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- Result := True;
- if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
- if (ctx^.i = ctx^.c) then
- exit(False);
- if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
- ctx^.surrogateState := True;
- ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
- end else begin
- ctx^.surrogateState := False;
- ctx^.cp := Word(ctx^.ps[0]);
- end;
- end else begin
- ctx^.surrogateState := False;
- ctx^.cp := Word(ctx^.ps[0]);
- end;
- end;
- function FindPropUCA() : Boolean;
- var
- candidateCL : PUCA_DataBook;
- begin
- ctx^.pp := nil;
- if (ctx^.cl = nil) then
- candidateCL := ctx^.Collation
- else
- candidateCL := ctx^.cl;
- if ctx^.surrogateState then begin
- while (candidateCL <> nil) do begin
- ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
- if (ctx^.pp <> nil) then
- break;
- candidateCL := candidateCL^.Base;
- end;
- end else begin
- while (candidateCL <> nil) do begin
- ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
- if (ctx^.pp <> nil) then
- break;
- candidateCL := candidateCL^.Base;
- end;
- end;
- ctx^.cl := candidateCL;
- Result := (ctx^.pp <> nil);
- end;
- procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- var
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- if (ctx^.pp^.WeightLength > 0) then begin
- AddWeights(ctx^.pp);
- end else
- if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
- ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- end;
- //AddWeights(pp);
- ClearHistory();
- ClearPP(ctx);
- end;
- function StartMatch() : Boolean;
- procedure HandleLastChar();
- var
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- while True do begin
- if ctx^.pp^.IsValid() then begin
- if (ctx^.pp^.WeightLength > 0) then
- AddWeights(ctx^.pp)
- else
- if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
- ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then
- AddContextWeights(@ctxNode^.Data)
- else
- AddComputedWeights(ctx^.cp){handle deletion of code point};
- break;
- end;
- if (ctx^.cl^.Base = nil) then begin
- AddComputedWeights(ctx^.cp);
- break;
- end;
- ctx^.cl := ctx^.cl^.Base;
- if not FindPropUCA() then begin
- AddComputedWeights(ctx^.cp);
- break;
- end;
- end;
- end;
- var
- tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- Result := False;
- ctx^.ppLevel := 0;
- if not FindPropUCA() then begin
- AddComputedWeights(ctx^.cp);
- ClearHistory();
- ClearPP(ctx);
- Result := True;
- end else begin
- if (ctx^.i = ctx^.c) then begin
- HandleLastChar();
- Result := True;
- end else begin
- if ctx^.pp^.IsValid()then begin
- if (ctx^.pp^.ChildCount = 0) then begin
- if (ctx^.pp^.WeightLength > 0) then
- AddWeights(ctx^.pp)
- else
- if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
- ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
- (tmpCtxNode^.Data.WeightCount > 0)
- then
- AddContextWeights(@tmpCtxNode^.Data)
- else
- AddComputedWeights(ctx^.cp){handle deletion of code point};
- ClearPP(ctx);
- ClearHistory();
- Result := True;
- end else begin
- RecordStep();
- end
- end else begin
- if (ctx^.pp^.ChildCount = 0) then begin
- AddComputedWeights(ctx^.cp);
- ClearPP(ctx);
- ClearHistory();
- Result := True;
- end else begin
- RecordStep();
- end;
- end;
- end;
- end;
- end;
- function TryPermutation() : Boolean;
- var
- kk : Integer;
- b : Boolean;
- puk : PUC_Prop;
- ppk : PUCA_PropItemRec;
- begin
- Result := False;
- puk := GetProps(ctx^.cp);
- if (puk^.C3 = 0) then
- exit;
- ctx^.lastUnblockedNonstarterCCC := puk^.C3;
- if ctx^.surrogateState then
- kk := ctx^.i + 2
- else
- kk := ctx^.i + 1;
- while IsUnblockedNonstarter(kk) do begin
- b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
- if b then
- ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
- else
- ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
- if (ppk <> nil) then begin
- ctx^.pp := ppk;
- RemoveChar(kk);
- Inc(ctx^.ppLevel);
- RecordStep();
- Result := True;
- if (ctx^.pp^.ChildCount = 0 ) then
- Break;
- end;
- if b then
- Inc(kk);
- Inc(kk);
- end;
- end;
- procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
- begin
- if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
- Inc(ctx^.i);
- Inc(ctx^.ps);
- end;
- Inc_I();
- end;
- var
- ok : Boolean;
- pp1 : PUCA_PropItemRec;
- cltemp : PUCA_DataBook;
- ctxNode : PUCA_PropItemContextTreeNodeRec;
- begin
- if AContext^.Finished then
- exit(False);
- ctx := AContext;
- while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
- ok := False;
- if (ctx^.pp = nil) then begin // Start Matching
- ok := StartMatch();
- end else begin
- pp1 := FindChild(ctx^.cp,ctx^.pp);
- if (pp1 <> nil) then begin
- Inc(ctx^.ppLevel);
- ctx^.pp := pp1;
- if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
- ok := False;
- if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
- if (ctx^.pp^.WeightLength > 0) then begin
- AddWeightsAndClear();
- ok := True;
- end else
- if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
- ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- ClearHistory();
- ClearPP(ctx);
- ok := True;
- end
- end;
- if not ok then begin
- RecordDeletion();
- while HasHistory() do begin
- GoBack();
- if ctx^.pp^.IsValid() and
- ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
- ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- Break;
- end;
- end;
- if not ok then begin
- cltemp := ctx^.cl^.Base;
- if (cltemp <> nil) then begin
- ClearPP(ctx,False);
- ctx^.cl := cltemp;
- Continue;
- end;
- end;
- if not ok then begin
- AddComputedWeights(ctx^.cp);
- ClearHistory();
- ClearPP(ctx);
- ok := True;
- end;
- end;
- end else begin
- RecordStep();
- end;
- end else begin
- // permutations !
- ok := False;
- if TryPermutation() and ctx^.pp^.IsValid() then begin
- if (ctx^.suppressState.CharCount = 0) then begin
- AddWeightsAndClear();
- //ok := True;
- exit(True);// Continue;
- end;
- while True do begin
- if ctx^.pp^.IsValid() and
- (ctx^.pp^.WeightLength > 0) and
- ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
- ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- break;
- end;
- if not HasHistory() then
- break;
- GoBack();
- if (ctx^.pp = nil) then
- break;
- end;
- end;
- if not ok then begin
- if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
- if (ctx^.pp^.WeightLength > 0) then begin
- AddWeightsAndClear();
- ok := True;
- end else
- if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
- ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
- (ctxNode^.Data.WeightCount > 0)
- then begin
- AddContextWeights(@ctxNode^.Data);
- ClearHistory();
- ClearPP(ctx);
- ok := True;
- end
- end;
- if ok then
- exit(True);// Continue;
- end;
- if not ok then begin
- if (ctx^.cl^.Base <> nil) then begin
- cltemp := ctx^.cl^.Base;
- while HasHistory() do
- GoBack();
- ctx^.pp := nil;
- ctx^.ppLevel := 0;
- ctx^.cl := cltemp;
- Continue;
- end;
- //walk back
- ok := False;
- while HasHistory() do begin
- GoBack();
- if ctx^.pp^.IsValid() and
- (ctx^.pp^.WeightLength > 0) and
- ( (ctx^.suppressState.CharCount = 0) or
- ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
- ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
- )
- )
- then begin
- AddWeightsAndClear();
- ok := True;
- Break;
- end;
- end;
- if ok then begin
- AdvanceCharPos();
- exit(True);// Continue;
- end;
- if (ctx^.pp <> nil) then begin
- AddComputedWeights(ctx^.cp);
- ClearHistory();
- ClearPP(ctx);
- ok := True;
- end;
- end;
- end;
- end;
- if ctx^.surrogateState then begin
- Inc(ctx^.ps);
- Inc(ctx^.i);
- end;
- //
- Inc_I();
- if ok then
- exit(True);
- end;
- SetLength(ctx^.r,ctx^.ral);
- ctx^.Finished := True;
- Result := True;
- end;
- function ComputeSortKey(
- const AStr : PUnicodeChar;
- const ALength : SizeInt;
- const ACollation : PUCA_DataBook
- ) : TUCASortKey;
- var
- r : TUCA_PropWeightsArray;
- begin
- r := ComputeRawSortKey(AStr,ALength,ACollation);
- Result := FormKey(r,ACollation);
- end;
- end.
|