12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021 |
- { Unicode parser helper unit.
- Copyright (c) 2012-2015 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
- unit helper;
- {$mode delphi}
- {$H+}
- {$PACKENUM 1}
- {$pointermath on}
- {$typedaddress on}
- {$warn 4056 off} //Conversion between ordinals and pointers is not portable
- {$macro on}
- {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
- {$define X_PACKED:=}
- {$else FPC_REQUIRES_PROPER_ALIGNMENT}
- {$define X_PACKED:=packed}
- {$endif FPC_REQUIRES_PROPER_ALIGNMENT}
- interface
- uses
- Classes, SysUtils, StrUtils;
- const
- SLicenseText =
- ' { Unicode implementation tables. ' + sLineBreak +
- ' ' + sLineBreak +
- ' Copyright (c) 2013 - 2017 by Inoussa OUEDRAOGO ' + sLineBreak +
- ' ' + sLineBreak +
- ' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
- ' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
- ' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
- ' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
- ' Files or Software without restriction, including without ' + sLineBreak +
- ' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
- ' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
- ' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
- ' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
- ' notice(s) and this permission notice appear with all copies ' + sLineBreak +
- ' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
- ' notice(s) and this permission notice appear in associated ' + sLineBreak +
- ' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
- ' Data File or in the Software as well as in the documentation ' + sLineBreak +
- ' associated with the Data File(s) or Software that the data or ' + sLineBreak +
- ' software has been modified. ' + sLineBreak +
- ' ' + sLineBreak +
- ' ' + sLineBreak +
- ' This program is distributed in the hope that it will be useful, ' + sLineBreak +
- ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
- ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
- WEIGHT_LEVEL_COUNT = 3;
- type
- // Unicode General Category
- TUnicodeCategory = (
- ucUppercaseLetter, // Lu = Letter, uppercase
- ucLowercaseLetter, // Ll = Letter, lowercase
- ucTitlecaseLetter, // Lt = Letter, titlecase
- ucModifierLetter, // Lm = Letter, modifier
- ucOtherLetter, // Lo = Letter, other
- ucNonSpacingMark, // Mn = Mark, nonspacing
- ucCombiningMark, // Mc = Mark, spacing combining
- ucEnclosingMark, // Me = Mark, enclosing
- ucDecimalNumber, // Nd = Number, decimal digit
- ucLetterNumber, // Nl = Number, letter
- ucOtherNumber, // No = Number, other
- ucConnectPunctuation, // Pc = Punctuation, connector
- ucDashPunctuation, // Pd = Punctuation, dash
- ucOpenPunctuation, // Ps = Punctuation, open
- ucClosePunctuation, // Pe = Punctuation, close
- ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
- ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
- ucOtherPunctuation, // Po = Punctuation, other
- ucMathSymbol, // Sm = Symbol, math
- ucCurrencySymbol, // Sc = Symbol, currency
- ucModifierSymbol, // Sk = Symbol, modifier
- ucOtherSymbol, // So = Symbol, other
- ucSpaceSeparator, // Zs = Separator, space
- ucLineSeparator, // Zl = Separator, line
- ucParagraphSeparator, // Zp = Separator, paragraph
- ucControl, // Cc = Other, control
- ucFormat, // Cf = Other, format
- ucSurrogate, // Cs = Other, surrogate
- ucPrivateUse, // Co = Other, private use
- ucUnassigned // Cn = Other, not assigned (including noncharacters)
- );
- TUInt24Rec = packed record
- public
- {$ifdef FPC_LITTLE_ENDIAN}
- byte0, byte1, byte2 : Byte;
- {$else FPC_LITTLE_ENDIAN}
- byte2, byte1, byte0 : Byte;
- {$endif FPC_LITTLE_ENDIAN}
- public
- class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- end;
- UInt24 = TUInt24Rec;
- PUInt24 = ^UInt24;
- TUnicodeCodePoint = Cardinal;
- TUnicodeCodePointArray = array of TUnicodeCodePoint;
- TDecompositionArray = array of TUnicodeCodePointArray;
- TNumericValue = Double;
- TNumericValueArray = array of TNumericValue;
- TBlockItemRec = packed record
- RangeStart : TUnicodeCodePoint;
- RangeEnd : TUnicodeCodePoint;
- Name : string[120];
- CanonicalName : string[120];
- end;
- TBlocks = array of TBlockItemRec;
- PPropRec = ^TPropRec;
- { TPropRec }
- TPropRec = packed record
- private
- const FLAG_WHITE_SPACE = 0;
- const FLAG_HANGUL_SYLLABLE = 1;
- const FLAG_UNIFIED_IDEOGRAPH = 2;
- private
- function GetCategory : TUnicodeCategory;inline;
- procedure SetCategory(AValue : TUnicodeCategory);
- function GetWhiteSpace : Boolean;inline;
- procedure SetWhiteSpace(AValue : Boolean);
- function GetHangulSyllable : Boolean;inline;
- procedure SetHangulSyllable(AValue : Boolean);
- function GetUnifiedIdeograph : Boolean;inline;
- procedure SetUnifiedIdeograph(AValue : Boolean);
- public
- CategoryData : Byte;
- PropID : Word;
- CCC : Byte; // Canonical Combining Class
- NumericIndex : Byte;
- SimpleUpperCase : UInt24;
- SimpleLowerCase : UInt24;
- DecompositionID : SmallInt;
- public
- property Category : TUnicodeCategory read GetCategory write SetCategory;
- property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
- property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
- property UnifiedIdeograph : Boolean read GetUnifiedIdeograph write SetUnifiedIdeograph;
- end;
- TPropRecArray = array of TPropRec;
- TDecompositionIndexRec = packed record
- StartPosition : Word;
- Length : Byte;
- end;
- TDecompositionBook = X_PACKED record
- Index : array of TDecompositionIndexRec;
- CodePoints : array of TUnicodeCodePoint;
- end;
- PDataLineRec = ^TDataLineRec;
- TDataLineRec = record
- PropID : Integer;
- case LineType : Byte of
- 0 : (CodePoint : TUnicodeCodePoint);
- 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
- end;
- TDataLineRecArray = array of TDataLineRec;
- TCodePointRec = record
- case LineType : Byte of
- 0 : (CodePoint : TUnicodeCodePoint);
- 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
- end;
- TCodePointRecArray = array of TCodePointRec;
- TPropListLineRec = packed record
- CodePoint : TCodePointRec;
- PropName : string[123];
- end;
- TPropListLineRecArray = array of TPropListLineRec;
- { TUCA_WeightRec }
- TUCA_WeightRec = packed record
- public
- Weights : array[0..3] of Cardinal;
- Variable : Boolean;
- public
- class operator Equal(a, b: TUCA_WeightRec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
- end;
- TUCA_WeightRecArray = array of TUCA_WeightRec;
- PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
- TUCA_LineContextItemRec = X_PACKED record
- public
- CodePoints : TUnicodeCodePointArray;
- Weights : TUCA_WeightRecArray;
- public
- procedure Clear();
- procedure Assign(ASource : PUCA_LineContextItemRec);
- function Clone() : TUCA_LineContextItemRec;
- end;
- PUCA_LineContextRec = ^TUCA_LineContextRec;
- TUCA_LineContextRec = X_PACKED record
- public
- Data : array of TUCA_LineContextItemRec;
- public
- procedure Clear();
- procedure Assign(ASource : PUCA_LineContextRec);
- function Clone() : TUCA_LineContextRec;
- end;
- PUCA_LineRec = ^TUCA_LineRec;
- TUCA_LineRec = X_PACKED record
- public
- CodePoints : TUnicodeCodePointArray;
- Weights : TUCA_WeightRecArray;
- Context : TUCA_LineContextRec;
- //Variable : Boolean;
- Deleted : Boolean;
- Stored : Boolean;
- public
- procedure Clear();
- procedure Assign(ASource : PUCA_LineRec);
- function Clone() : TUCA_LineRec;
- function HasContext() : Boolean;
- end;
- TUCA_VariableKind = (
- ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
- ucaIgnoreSP
- );
- TUCA_DataBook = X_PACKED record
- Version : string;
- VariableWeight : TUCA_VariableKind;
- Backwards : array[0..3] of Boolean;
- Lines : array of TUCA_LineRec;
- end;
- PUCA_DataBook = ^TUCA_DataBook;
- TUCA_DataBookIndex = array of Integer;
- type
- TUCA_PropWeights = packed record
- Weights : array[0..2] of Word;
- //Variable : Byte;
- end;
- PUCA_PropWeights = ^TUCA_PropWeights;
- TUCA_PropItemContextRec = packed record
- CodePointCount : Byte;
- WeightCount : Byte;
- //CodePoints : UInt24;
- //Weights : TUCA_PropWeights;
- end;
- PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
- TUCA_PropItemContextTreeNodeRec = packed record
- Left : Word;
- Right : Word;
- Data : TUCA_PropItemContextRec;
- end;
- PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
- TUCA_PropItemContextTreeRec = packed record
- public
- Size : UInt24;
- public
- function GetData:PUCA_PropItemContextTreeNodeRec;inline;
- property Data : PUCA_PropItemContextTreeNodeRec read GetData;
- end;
- PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
- { TUCA_PropItemRec }
- TUCA_PropItemRec = packed record
- private
- const FLAG_VALID = 0;
- const FLAG_CODEPOINT = 1;
- const FLAG_CONTEXTUAL = 2;
- const FLAG_DELETION = 3;
- const FLAG_COMPRESS_WEIGHT_1 = 6;
- const FLAG_COMPRESS_WEIGHT_2 = 7;
- private
- function GetWeightSize : Word;inline;
- public
- WeightLength : Byte;
- ChildCount : Byte;
- Size : Word;
- Flags : Byte;
- public
- function HasCodePoint() : Boolean;inline;
- function GetCodePoint() : UInt24;//inline;
- property CodePoint : UInt24 read GetCodePoint;
- //Weights : array[0..WeightLength] of TUCA_PropWeights;
- procedure GetWeightArray(ADest : PUCA_PropWeights);
- function GetSelfOnlySize() : Cardinal;inline;
- procedure SetContextual(AValue : Boolean);inline;
- function GetContextual() : Boolean;inline;
- property Contextual : Boolean read GetContextual write setContextual;
- function GetContext() : PUCA_PropItemContextTreeRec;
- procedure SetDeleted(AValue : Boolean);inline;
- function IsDeleted() : Boolean;inline;
- function IsValid() : Boolean;inline;
- function IsWeightCompress_1() : Boolean;inline;
- function IsWeightCompress_2() : Boolean;inline;
- end;
- PUCA_PropItemRec = ^TUCA_PropItemRec;
- TUCA_PropIndexItem = packed record
- CodePoint : Cardinal;
- Position : Integer;
- end;
- PUCA_PropIndexItem = ^TUCA_PropIndexItem;
- TUCA_PropBook = X_PACKED record
- ItemSize : Integer;
- Index : array of TUCA_PropIndexItem;
- Items : PUCA_PropItemRec; //Native Endian
- ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
- VariableLowLimit : Word;
- VariableHighLimit : Word;
- end;
- PUCA_PropBook = ^TUCA_PropBook;
- TBmpFirstTable = array[0..255] of Byte;
- TBmpSecondTableItem = array[0..255] of Word;
- TBmpSecondTable = array of TBmpSecondTableItem;
- T3lvlBmp1Table = array[0..255] of Byte;
- T3lvlBmp2TableItem = array[0..15] of Word;
- T3lvlBmp2Table = array of T3lvlBmp2TableItem;
- T3lvlBmp3TableItem = array[0..15] of Word;
- T3lvlBmp3Table = array of T3lvlBmp3TableItem;
- TucaBmpFirstTable = array[0..255] of Byte;
- TucaBmpSecondTableItem = array[0..255] of Cardinal;
- TucaBmpSecondTable = array of TucaBmpSecondTableItem;
- PucaBmpFirstTable = ^TucaBmpFirstTable;
- PucaBmpSecondTable = ^TucaBmpSecondTable;
- const
- LOW_SURROGATE_BEGIN = Word($DC00);
- LOW_SURROGATE_END = Word($DFFF);
- LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
- HIGH_SURROGATE_BEGIN = Word($D800);
- HIGH_SURROGATE_END = Word($DBFF);
- HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
- type
- TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
- TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
- TOBmpSecondTable = array of TOBmpSecondTableItem;
- T3lvlOBmp1Table = array[0..1023] of Byte;
- T3lvlOBmp2TableItem = array[0..31] of Word;
- T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
- T3lvlOBmp3TableItem = array[0..31] of Word;
- T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
- TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
- TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
- TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
- PucaOBmpFirstTable = ^TucaOBmpFirstTable;
- PucaOBmpSecondTable = ^TucaOBmpSecondTable;
- type
- TEndianKind = (ekLittle, ekBig);
- const
- ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
- {$IFDEF ENDIAN_LITTLE}
- ENDIAN_NATIVE = ekLittle;
- ENDIAN_NON_NATIVE = ekBig;
- {$ENDIF ENDIAN_LITTLE}
- {$IFDEF ENDIAN_BIG}
- ENDIAN_NATIVE = ekBig;
- ENDIAN_NON_NATIVE = ekLittle;
- {$ENDIF ENDIAN_BIG}
- procedure GenerateLicenceText(ADest : TStream);
- function BoolToByte(AValue : Boolean): Byte;inline;
- function IsHangulSyllable(
- const ACodePoint : TUnicodeCodePoint;
- const AHangulList : TCodePointRecArray
- ) : Boolean;
- procedure ParseHangulSyllableTypes(
- ADataAStream : TMemoryStream;
- var ACodePointList : TCodePointRecArray
- );
- procedure ParseProps(
- ADataAStream : TMemoryStream;
- var APropList : TPropListLineRecArray
- );
- function FindCodePointsByProperty(
- const APropName : string;
- const APropList : TPropListLineRecArray
- ) : TCodePointRecArray;
- procedure ParseBlokcs(
- ADataAStream : TMemoryStream;
- var ABlocks : TBlocks
- );
- procedure ParseUCAFile(
- ADataAStream : TMemoryStream;
- var ABook : TUCA_DataBook
- );
- procedure MakeUCA_Props(
- ABook : PUCA_DataBook;
- out AProps : PUCA_PropBook
- );
- procedure FreeUcaBook(var ABook : PUCA_PropBook);
- procedure MakeUCA_BmpTables(
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable;
- const APropBook : PUCA_PropBook
- );
- procedure MakeUCA_OBmpTables(
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable;
- const APropBook : PUCA_PropBook
- );
- function GetPropPosition(
- const AHighS,
- ALowS : Word;
- const AFirstTable : PucaOBmpFirstTable;
- const ASecondTable : PucaOBmpSecondTable
- ): Integer;inline;overload;
- procedure GenerateUCA_Head(
- ADest : TStream;
- ABook : PUCA_DataBook;
- AProps : PUCA_PropBook
- );
- procedure GenerateUCA_BmpTables(
- AStream,
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable
- );
- procedure GenerateBinaryUCA_BmpTables(
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable
- );
- procedure GenerateUCA_PropTable(
- ADest : TStream;
- const APropBook : PUCA_PropBook;
- const AEndian : TEndianKind
- );
- procedure GenerateBinaryUCA_PropTable(
- // WARNING : files must be generated for each endianess (Little / Big)
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- const APropBook : PUCA_PropBook
- );
- procedure GenerateUCA_OBmpTables(
- AStream,
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable
- );
- procedure GenerateBinaryUCA_OBmpTables(
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable
- );
- procedure Parse_UnicodeData(
- ADataAStream : TMemoryStream;
- var APropList : TPropRecArray;
- var ANumericTable : TNumericValueArray;
- var ADataLineList : TDataLineRecArray;
- var ADecomposition : TDecompositionArray;
- const AHangulList : TCodePointRecArray;
- const AWhiteSpaces : TCodePointRecArray;
- const AUnifiedIdeographs : TCodePointRecArray
- );
- procedure MakeDecomposition(
- const ARawData : TDecompositionArray;
- var ABook : TDecompositionBook
- );
- procedure MakeBmpTables(
- var AFirstTable : TBmpFirstTable;
- var ASecondTable : TBmpSecondTable;
- const ADataLineList : TDataLineRecArray
- );
- procedure MakeBmpTables3Levels(
- var AFirstTable : T3lvlBmp1Table;
- var ASecondTable : T3lvlBmp2Table;
- var AThirdTable : T3lvlBmp3Table;
- const ADataLineList : TDataLineRecArray
- );
- procedure GenerateBmpTables(
- ADest : TStream;
- var AFirstTable : TBmpFirstTable;
- var ASecondTable : TBmpSecondTable
- );
- procedure Generate3lvlBmpTables(
- ADest : TStream;
- var AFirstTable : T3lvlBmp1Table;
- var ASecondTable : T3lvlBmp2Table;
- var AThirdTable : T3lvlBmp3Table
- );
- procedure GeneratePropTable(
- ADest : TStream;
- const APropList : TPropRecArray;
- const AEndian : TEndianKind
- );
- procedure GenerateNumericTable(
- ADest : TStream;
- const ANumList : TNumericValueArray;
- const ACompleteUnit : Boolean
- );
- procedure GenerateDecompositionBookTable(
- ADest : TStream;
- const ABook : TDecompositionBook;
- const AEndian : TEndianKind
- );
- procedure GenerateOutBmpTable(
- ADest : TStream;
- const AList : TDataLineRecArray
- );
- function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
- function EvaluateFloat(const AStr : string) : Double;
- function StrToCategory(const AStr : string) : TUnicodeCategory;
- function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
- function IsWhiteSpace(
- const ACodePoint : TUnicodeCodePoint;
- const AWhiteSpaces : TCodePointRecArray
- ) : Boolean;inline;
- function IsIncluded(
- const ACodePoint : TUnicodeCodePoint;
- const AList : TCodePointRecArray
- ) : Boolean;
- function GetPropID(
- ACodePoint : TUnicodeCodePoint;
- const ADataLineList : TDataLineRecArray
- ) : Cardinal;
- //--------------------
- procedure MakeOBmpTables(
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable;
- const ADataLineList : TDataLineRecArray
- );
- procedure MakeOBmpTables3Levels(
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table;
- const ADataLineList : TDataLineRecArray
- );
- procedure GenerateOBmpTables(
- ADest : TStream;
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable
- );
- procedure Generate3lvlOBmpTables(
- ADest : TStream;
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table
- );
- function GetProp(
- const AHighS,
- ALowS : Word;
- const AProps : TPropRecArray;
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable
- ): PPropRec; inline;overload;
- function GetProp(
- const AHighS,
- ALowS : Word;
- const AProps : TPropRecArray;
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table
- ): PPropRec; inline;overload;
- procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
- function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
- type
- TBitOrder = 0..7;
- function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
- procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
- function GenerateEndianIncludeFileName(
- const AStoreName : string;
- const AEndian : TEndianKind
- ): string;inline;
- procedure ReverseFromNativeEndian(
- const AData : PUCA_PropItemRec;
- const ADataLen : Cardinal;
- const ADest : PUCA_PropItemRec
- );
- procedure ReverseToNativeEndian(
- const AData : PUCA_PropItemRec;
- const ADataLen : Cardinal;
- const ADest : PUCA_PropItemRec
- );
- procedure CompareProps(
- const AProp1,
- AProp2 : PUCA_PropItemRec;
- const ADataLen : Integer
- );
- type
- TCollationName = array[0..(128-1)] of Byte;
- TCollationVersion = TCollationName;
- TSerializedCollationHeader = packed record
- Base : TCollationName;
- Version : TCollationVersion;
- CollationName : TCollationName;
- CollationAliases : TCollationName; // ";" separated
- VariableWeight : Byte;
- Backwards : Byte;
- BMP_Table1Length : DWord;
- BMP_Table2Length : DWord;
- OBMP_Table1Length : DWord;
- OBMP_Table2Length : DWord;
- PropCount : DWord;
- VariableLowLimit : Word;
- VariableHighLimit : Word;
- NoNormalization : Byte;
- Strength : Byte;
- ChangedFields : Byte;
- end;
- PSerializedCollationHeader = ^TSerializedCollationHeader;
- procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);overload;
- procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);overload;
- procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
- procedure ReverseBytes(var AData; const ALength : Integer);
- procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
- function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
- procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
- function RewriteLevel2(
- const ALevel1Value : Cardinal;
- ALines : PUCA_LineRec;
- const ALinesLength : Integer
- ) : Integer;
- resourcestring
- SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
- implementation
- uses
- typinfo, Math, AVL_Tree,
- trie;
- type
- TCardinalRec = packed record
- {$ifdef FPC_LITTLE_ENDIAN}
- byte0, byte1, byte2, byte3 : Byte;
- {$else FPC_LITTLE_ENDIAN}
- byte3, byte2, byte1, byte0 : Byte;
- {$endif FPC_LITTLE_ENDIAN}
- end;
- TWordRec = packed record
- {$ifdef FPC_LITTLE_ENDIAN}
- byte0, byte1 : Byte;
- {$else FPC_LITTLE_ENDIAN}
- byte1, byte0 : Byte;
- {$endif FPC_LITTLE_ENDIAN}
- end;
- { TUInt24Rec }
- class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
- begin
- TCardinalRec(Result).byte0 := a.byte0;
- TCardinalRec(Result).byte1 := a.byte1;
- TCardinalRec(Result).byte2 := a.byte2;
- TCardinalRec(Result).byte3 := 0;
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
- begin
- TCardinalRec(Result).byte0 := a.byte0;
- TCardinalRec(Result).byte1 := a.byte1;
- TCardinalRec(Result).byte2 := a.byte2;
- TCardinalRec(Result).byte3 := 0;
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
- begin
- Result := Cardinal(a);
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
- begin
- {$IFOPT R+}
- if (a.byte2 > 0) then
- Error(reIntOverflow);
- {$ENDIF R+}
- TWordRec(Result).byte0 := a.byte0;
- TWordRec(Result).byte1 := a.byte1;
- end;
- class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
- begin
- {$IFOPT R+}
- if (a.byte1 > 0) or (a.byte2 > 0) then
- Error(reIntOverflow);
- {$ENDIF R+}
- Result := a.byte0;
- end;
- class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
- begin
- {$IFOPT R+}
- if (a > $FFFFFF) then
- Error(reIntOverflow);
- {$ENDIF R+}
- Result.byte0 := TCardinalRec(a).byte0;
- Result.byte1 := TCardinalRec(a).byte1;
- Result.byte2 := TCardinalRec(a).byte2;
- end;
- class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
- begin
- Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
- begin
- Result := (TCardinalRec(b).byte3 = 0) and
- (a.byte0 = TCardinalRec(b).byte0) and
- (a.byte1 = TCardinalRec(b).byte1) and
- (a.byte2 = TCardinalRec(b).byte2);
- end;
- class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
- begin
- Result := (LongInt(a) = b);
- end;
- class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
- begin
- Result := (a.byte2 = 0) and
- (a.byte0 = TWordRec(b).byte0) and
- (a.byte1 = TWordRec(b).byte1);
- end;
- class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
- begin
- Result := (a.byte2 = 0) and
- (a.byte1 = 0) and
- (a.byte0 = b);
- end;
- class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
- begin
- Result := (b = a);
- end;
- class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
- begin
- Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
- end;
- class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
- begin
- Result := (TCardinalRec(b).byte3 <> 0) or
- (a.byte0 <> TCardinalRec(b).byte0) or
- (a.byte1 <> TCardinalRec(b).byte1) or
- (a.byte2 <> TCardinalRec(b).byte2);
- end;
- class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
- begin
- Result := (b <> a);
- end;
- class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
- begin
- Result := (a.byte2 > b.byte2) or
- ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
- ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
- end;
- class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) > b;
- end;
- class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a > Cardinal(b);
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
- begin
- Result := (a.byte2 > b.byte2) or
- ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
- ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) >= b;
- end;
- class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a >= Cardinal(b);
- end;
- class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
- begin
- Result := (b > a);
- end;
- class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) < b;
- end;
- class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a < Cardinal(b);
- end;
- class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
- begin
- Result := (b >= a);
- end;
- class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
- begin
- Result := Cardinal(a) <= b;
- end;
- class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
- begin
- Result := a <= Cardinal(b);
- end;
- { TUCA_WeightRec }
- class operator TUCA_WeightRec.Equal(a, b : TUCA_WeightRec) : Boolean;
- begin
- Result := (a.Weights[0] = b.Weights[0]) and (a.Weights[1] = b.Weights[1]) and
- (a.Weights[2] = b.Weights[2]) and (a.Weights[3] = b.Weights[3]) and
- (a.Variable = b.Variable);
- end;
- procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);
- begin
- StringToByteArray(AStr,@(ABuffer[Low(ABuffer)]),Length(ABuffer));
- end;
- procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);
- var
- c, i, bl : Integer;
- ps : PWord;
- pb : PByte;
- begin
- if (ABufferLength < 1) then
- exit;
- c := Length(AStr);
- if (c > ABufferLength) then
- c := ABufferLength;
- bl := 0;
- pb := ABuffer;
- if (c > 0) then begin
- ps := PWord(@AStr[1]);
- for i := 1 to c do begin
- if (ps^ <= High(Byte)) then begin
- pb^ := ps^;
- bl := bl+1;
- Inc(pb);
- end;
- Inc(ps);
- end;
- end;
- if (bl < ABufferLength) then begin
- for i := bl+1 to ABufferLength do begin
- pb^:= 0;
- Inc(pb);
- end;
- end;
- end;
- function GenerateEndianIncludeFileName(
- const AStoreName : string;
- const AEndian : TEndianKind
- ): string;inline;
- begin
- Result := ExtractFilePath(AStoreName) +
- ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
- end;
- function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
- begin
- Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
- end;
- procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
- begin
- if AValue then
- AData := AData or (1 shl (ABit mod 8))
- else
- AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
- end;
- var
- FS : TFormatSettings;
- function EvaluateFloat(const AStr : string) : Double;
- var
- s, n, d : string;
- i : Integer;
- begin
- Result := 0;
- s := Trim(AStr);
- if (Length(s) > 0) then begin
- i := Pos('/',s);
- if (i < 1) then
- Result := StrToFloat(s,FS)
- else begin
- n := Copy(s,1,i-1);
- d := Copy(s,i+1,MaxInt);
- Result := StrToInt(n) / StrToInt(d);
- end;
- end;
- end;
- function StrToCategory(const AStr : string) : TUnicodeCategory;
- var
- s : string;
- begin
- s := UpperCase(Trim(AStr));
- if (s = 'LU') then
- Result := ucUppercaseLetter
- else if (s = 'LL') then
- Result := ucLowercaseLetter
- else if (s = 'LT') then
- Result := ucTitlecaseLetter
- else if (s = 'LM') then
- Result := ucModifierLetter
- else if (s = 'LO') then
- Result := ucOtherLetter
- else
- if (s = 'MN') then
- Result := ucNonSpacingMark
- else if (s = 'MC') then
- Result := ucCombiningMark
- else if (s = 'ME') then
- Result := ucEnclosingMark
- else
- if (s = 'ND') then
- Result := ucDecimalNumber
- else if (s = 'NL') then
- Result := ucLetterNumber
- else if (s = 'NO') then
- Result := ucOtherNumber
- else
- if (s = 'PC') then
- Result := ucConnectPunctuation
- else if (s = 'PD') then
- Result := ucDashPunctuation
- else if (s = 'PS') then
- Result := ucOpenPunctuation
- else if (s = 'PE') then
- Result := ucClosePunctuation
- else if (s = 'PI') then
- Result := ucInitialPunctuation
- else if (s = 'PF') then
- Result := ucFinalPunctuation
- else if (s = 'PO') then
- Result := ucOtherPunctuation
- else
- if (s = 'SM') then
- Result := ucMathSymbol
- else if (s = 'SC') then
- Result := ucCurrencySymbol
- else if (s = 'SK') then
- Result := ucModifierSymbol
- else if (s = 'SO') then
- Result := ucOtherSymbol
- else
- if (s = 'ZS') then
- Result := ucSpaceSeparator
- else if (s = 'ZL') then
- Result := ucLineSeparator
- else if (s = 'ZP') then
- Result := ucParagraphSeparator
- else
- if (s = 'CC') then
- Result := ucControl
- else if (s = 'CF') then
- Result := ucFormat
- else if (s = 'CS') then
- Result := ucSurrogate
- else if (s = 'CO') then
- Result := ucPrivateUse
- else
- Result := ucUnassigned;
- end;
- function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
- var
- s : string;
- begin
- s := Trim(ACP);
- Result := 0;
- if (Length(s) > 0) and (s <> '#') then
- Result := StrToInt('$' + s);
- end;
- function IsIncluded(
- const ACodePoint : TUnicodeCodePoint;
- const AList : TCodePointRecArray
- ) : Boolean;
- var
- i : Integer;
- p : ^TCodePointRec;
- begin
- Result := False;
- p := @AList[Low(AList)];
- for i := Low(AList) to High(AList) do begin
- if (p^.LineType = 0) then begin
- if (p^.CodePoint = ACodePoint) then begin
- Result := True;
- break;
- end;
- end else begin
- if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then begin
- Result := True;
- break;
- end;
- end;
- Inc(p);
- end;
- end;
- {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
- begin
- case ACodePoint of
- $0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
- $0020 : Result := True;// White_Space # Zs SPACE
- $0085 : Result := True;// White_Space # Cc <control-0085>
- $00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
- $1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
- $180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
- $2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
- $2028 : Result := True;// White_Space # Zl LINE SEPARATOR
- $2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
- $202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
- $205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
- $3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
- else
- Result := False;
- end;
- end;}
- function IsWhiteSpace(
- const ACodePoint : TUnicodeCodePoint;
- const AWhiteSpaces : TCodePointRecArray
- ) : Boolean;
- begin
- Result := IsIncluded(ACodePoint,AWhiteSpaces);
- end;
- function NormalizeBlockName(const AName : string) : string;
- var
- i, c, k : Integer;
- s : string;
- begin
- c := Length(AName);
- SetLength(Result,c);
- s := LowerCase(AName);
- k := 0;
- for i := 1 to c do begin
- if (s[1] in ['a'..'z','0'..'9','-']) then begin
- k := k + 1;
- Result[k] := s[i];
- end;
- end;
- SetLength(Result,k);
- end;
- procedure ParseBlokcs(
- ADataAStream : TMemoryStream;
- var ABlocks : TBlocks
- );
- const
- LINE_LENGTH = 1024;
- DATA_LENGTH = 25000;
- var
- p : PAnsiChar;
- actualDataLen : Integer;
- bufferLength, bufferPos, lineLength, linePos : Integer;
- line : ansistring;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos = bufferPos) and (p^ = #10) then begin
- lineLength := 0;
- Inc(p);
- Inc(bufferPos);
- linePos := 1;
- Result := True;
- end else if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos);
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- linePos := 1;
- Result := True;
- end;
- end;
- function NextToken() : ansistring;
- var
- k : Integer;
- begin
- k := linePos;
- if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
- Inc(linePos);
- Result := Copy(line,k,(linePos-k));
- exit;
- end;
- while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[linePos] in [';','#','.']) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- end;
- procedure ParseLine();
- var
- locData : TBlockItemRec;
- s : ansistring;
- begin
- s := NextToken();
- if (s = '') or (s[1] = '#') then
- exit;
- locData.RangeStart := StrToInt('$'+s);
- s := NextToken();
- if (s <> '.') then
- raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
- s := NextToken();
- if (s <> '.') then
- raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
- s := NextToken();
- locData.RangeEnd := StrToInt('$'+s);
- s := NextToken();
- if (s <> ';') then
- raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
- locData.Name := Trim(NextToken());
- locData.CanonicalName := NormalizeBlockName(locData.Name);
- if (Length(ABlocks) <= actualDataLen) then
- SetLength(ABlocks,Length(ABlocks)*2);
- ABlocks[actualDataLen] := locData;
- Inc(actualDataLen);
- end;
- procedure Prepare();
- begin
- SetLength(ABlocks,DATA_LENGTH);
- actualDataLen := 0;
- bufferLength := ADataAStream.Size;
- bufferPos := 0;
- p := ADataAStream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- end;
- begin
- Prepare();
- while NextLine() do
- ParseLine();
- SetLength(ABlocks,actualDataLen);
- end;
- procedure ParseProps(
- ADataAStream : TMemoryStream;
- var APropList : TPropListLineRecArray
- );
- const
- LINE_LENGTH = 1024;
- DATA_LENGTH = 25000;
- var
- p : PAnsiChar;
- actualDataLen : Integer;
- bufferLength, bufferPos, lineLength, linePos : Integer;
- line : ansistring;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos = bufferPos) and (p^ = #10) then begin
- lineLength := 0;
- Inc(p);
- Inc(bufferPos);
- linePos := 1;
- Result := True;
- end else if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos);
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- linePos := 1;
- Result := True;
- end;
- end;
- function NextToken() : ansistring;
- var
- k : Integer;
- begin
- k := linePos;
- if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
- Inc(linePos);
- Result := Copy(line,k,(linePos-k));
- exit;
- end;
- while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[linePos] in [';','#','.']) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- end;
- procedure ParseLine();
- var
- locCP : Cardinal;
- locData : TPropListLineRec;
- s : ansistring;
- begin
- s := NextToken();
- if (s = '') or (s[1] = '#') then
- exit;
- locCP := StrToInt('$'+s);
- s := NextToken();
- if (s = ';') then begin
- locData.CodePoint.LineType := 0;
- locData.CodePoint.CodePoint := locCP;
- end else begin
- if (s = '') or (s <> '.') or (NextToken() <> '.') then
- raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
- locData.CodePoint.LineType := 1;
- locData.CodePoint.StartCodePoint := locCP;
- locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
- s := NextToken();
- if (s <> ';') then
- raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
- end;
- locData.PropName := Trim(NextToken());
- if (Length(APropList) <= actualDataLen) then
- SetLength(APropList,Length(APropList)*2);
- APropList[actualDataLen] := locData;
- Inc(actualDataLen);
- end;
- procedure Prepare();
- begin
- SetLength(APropList,DATA_LENGTH);
- actualDataLen := 0;
- bufferLength := ADataAStream.Size;
- bufferPos := 0;
- p := ADataAStream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- end;
- begin
- Prepare();
- while NextLine() do
- ParseLine();
- SetLength(APropList,actualDataLen);
- end;
- function FindCodePointsByProperty(
- const APropName : string;
- const APropList : TPropListLineRecArray
- ) : TCodePointRecArray;
- var
- r : TCodePointRecArray;
- i, k : Integer;
- s : string;
- begin
- k := 0;
- r := nil;
- s := LowerCase(Trim(APropName));
- for i := Low(APropList) to High(APropList) do begin
- if (LowerCase(APropList[i].PropName) = s) then begin
- if (k >= Length(r)) then begin
- if (k = 0) then
- SetLength(r,24)
- else
- SetLength(r,(2*Length(r)));
- end;
- r[k] := APropList[i].CodePoint;
- Inc(k);
- end;
- end;
- SetLength(r,k);
- Result := r;
- end;
- procedure ParseHangulSyllableTypes(
- ADataAStream : TMemoryStream;
- var ACodePointList : TCodePointRecArray
- );
- const
- LINE_LENGTH = 1024;
- DATA_LENGTH = 25000;
- var
- p : PAnsiChar;
- actualDataLen : Integer;
- bufferLength, bufferPos, lineLength, linePos : Integer;
- line : ansistring;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos = bufferPos) and (p^ = #10) then begin
- lineLength := 0;
- Inc(p);
- Inc(bufferPos);
- linePos := 1;
- Result := True;
- end else if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos);
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- linePos := 1;
- Result := True;
- end;
- end;
- function NextToken() : ansistring;
- var
- k : Integer;
- begin
- k := linePos;
- if (linePos < lineLength) and (line[linePos] = '.') then begin
- Inc(linePos);
- while (linePos < lineLength) and (line[linePos] = '.') do begin
- Inc(linePos);
- end;
- Result := Copy(line,k,(linePos-k));
- exit;
- end;
- while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[linePos] in [';','#','.']) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- //Inc(linePos);
- end;
- procedure ParseLine();
- var
- locData : TCodePointRec;
- s : ansistring;
- begin
- s := NextToken();
- if (s = '') or (s[1] = '#') then
- exit;
- locData.CodePoint := StrToInt('$'+s);
- s := NextToken();
- if (s = '') or (s[1] in [';','#']) then begin
- locData.LineType := 0;
- end else begin
- if (s <> '..') then
- raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
- locData.StartCodePoint := locData.CodePoint;
- locData.EndCodePoint := StrToInt('$'+NextToken());
- locData.LineType := 1;
- end;
- if (Length(ACodePointList) <= actualDataLen) then
- SetLength(ACodePointList,Length(ACodePointList)*2);
- ACodePointList[actualDataLen] := locData;
- Inc(actualDataLen);
- end;
- procedure Prepare();
- begin
- SetLength(ACodePointList,DATA_LENGTH);
- actualDataLen := 0;
- bufferLength := ADataAStream.Size;
- bufferPos := 0;
- p := ADataAStream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- end;
- begin
- Prepare();
- while NextLine() do
- ParseLine();
- SetLength(ACodePointList,actualDataLen);
- end;
- function IsHangulSyllable(
- const ACodePoint : TUnicodeCodePoint;
- const AHangulList : TCodePointRecArray
- ) : Boolean;
- var
- i : Integer;
- p : ^TCodePointRec;
- begin
- Result := False;
- p := @AHangulList[Low(AHangulList)];
- for i := Low(AHangulList) to High(AHangulList) do begin
- if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
- ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
- then begin
- Result := True;
- Break;
- end;
- Inc(p);
- end;
- end;
- function IndexOf(
- const AProp : TPropRec;
- const APropList : TPropRecArray;
- const AActualLen : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : PPropRec;
- begin
- Result := -1;
- if (AActualLen > 0) then begin
- p := @APropList[0];
- for i := 0 to AActualLen - 1 do begin
- if (AProp.Category = p^.Category) and
- (AProp.CCC = p^.CCC) and
- (AProp.NumericIndex = p^.NumericIndex) and
- (AProp.SimpleUpperCase = p^.SimpleUpperCase) and
- (AProp.SimpleLowerCase = p^.SimpleLowerCase) and
- (AProp.WhiteSpace = p^.WhiteSpace) and
- (AProp.UnifiedIdeograph = p^.UnifiedIdeograph) and
- //
- (AProp.DecompositionID = p^.DecompositionID) and
- (* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
- ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
- *)
- (AProp.HangulSyllable = p^.HangulSyllable)
- then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- function IndexOf(
- const AItem : TUnicodeCodePointArray;
- const AList : TDecompositionArray
- ) : Integer;overload;
- var
- p : TUnicodeCodePointArray;
- i : Integer;
- begin
- Result := -1;
- if (Length(AList) = 0) then
- exit;
- for i := Low(AList) to High(AList) do begin
- p := AList[i];
- if (Length(p) = Length(AItem)) then begin
- if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
- exit(i);
- end;
- end;
- Result := -1;
- end;
- function IndexOf(
- const AItem : TNumericValue;
- const AList : TNumericValueArray;
- const AActualLen : Integer
- ) : Integer;overload;
- var
- p : ^TNumericValue;
- i : Integer;
- begin
- Result := -1;
- if (AActualLen = 0) then
- exit;
- p := @AList[Low(AList)];
- for i := Low(AList) to AActualLen - 1 do begin
- if (AItem = p^) then
- exit(i);
- Inc(p);
- end;
- Result := -1;
- end;
- procedure Parse_UnicodeData(
- ADataAStream : TMemoryStream;
- var APropList : TPropRecArray;
- var ANumericTable : TNumericValueArray;
- var ADataLineList : TDataLineRecArray;
- var ADecomposition : TDecompositionArray;
- const AHangulList : TCodePointRecArray;
- const AWhiteSpaces : TCodePointRecArray;
- const AUnifiedIdeographs : TCodePointRecArray
- );
- const
- LINE_LENGTH = 1024;
- PROP_LENGTH = 5000;
- DATA_LENGTH = 25000;
- var
- p : PAnsiChar;
- bufferLength, bufferPos : Integer;
- actualPropLen, actualDataLen, actualNumLen : Integer;
- line : ansistring;
- lineLength, linePos : Integer;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos);
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- if (lineLength > 7) then begin
- linePos := 1;
- Result := True;
- end;
- end;
- end;
- function NextToken() : ansistring;
- var
- k : Integer;
- begin
- k := linePos;
- while (linePos < lineLength) and not(line[linePos] in [';','#']) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[linePos] in [';','#']) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- Inc(linePos);
- end;
- function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
- var
- locStr, ks : ansistring;
- k0,k : Integer;
- begin
- SetLength(Result,0);
- locStr := UpperCase(Trim(AStr));
- if (locStr = '') or (locStr[1] = '<') then
- exit;
- k0 := 1;
- k := 1;
- while (k <= Length(locStr)) do begin
- while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
- inc(k);
- ks := Trim(Copy(locStr,k0,k-k0));
- SetLength(Result,Length(Result)+1);
- Result[Length(Result)-1] := StringToCodePoint(ks);
- Inc(k);
- k0 := k;
- end;
- end;
- procedure ParseLine();
- var
- locCP : TUnicodeCodePoint;
- locProp : TPropRec;
- locData : TDataLineRec;
- s : ansistring;
- locRangeStart, locRangeEnd : Boolean;
- k : Integer;
- locDecompItem : TUnicodeCodePointArray;
- numVal : TNumericValue;
- begin
- FillChar(locData,SizeOf(locData),#0);
- FillChar(locProp,SizeOf(locProp),#0);
- locCP := StrToInt('$'+NextToken());
- s := NextToken();
- locRangeStart := AnsiEndsText(', First>',s);
- if locRangeStart then
- locRangeEnd := False
- else
- locRangeEnd := AnsiEndsText(', Last>',s);
- if locRangeStart then begin
- locData.LineType := 1;
- locData.StartCodePoint := locCP;
- end else if locRangeEnd then begin
- ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
- exit;
- //locData.EndCodePoint := locCP;
- end else begin
- locData.LineType := 0;
- locData.CodePoint := locCP;
- end;
- locProp.Category := StrToCategory(NextToken());
- locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
- NextToken();//Bidi_Class
- s := NextToken();//Decomposition_Type
- locDecompItem := ParseCanonicalDecomposition(s);
- if (Length(locDecompItem) = 0) then
- locProp.DecompositionID := -1
- else begin
- locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
- if (locProp.DecompositionID = -1) then begin
- k := Length(ADecomposition);
- locProp.DecompositionID := k;
- SetLength(ADecomposition,k+1);
- ADecomposition[k] := locDecompItem;
- end;
- end;
- numVal := EvaluateFloat(NextToken());
- if (numVal <> Double(0.0)) then begin
- NextToken();
- NextToken();
- end else begin
- s := NextToken();
- if (s <> '') then
- numVal := EvaluateFloat(s);
- s := NextToken();
- if (numVal = Double(0.0)) then
- numVal := EvaluateFloat(s);
- end;
- k := IndexOf(numVal,ANumericTable,actualNumLen);
- if (k = -1) then begin
- if (actualNumLen >= Length(ANumericTable)) then
- SetLength(ANumericTable,(actualNumLen*2));
- ANumericTable[actualNumLen] := numVal;
- k := actualNumLen;
- Inc(actualNumLen);
- end;
- locProp.NumericIndex := k;
- NextToken();//Bidi_Mirroed
- NextToken();//Unicode_l_Name
- NextToken();//ISO_Comment
- locProp.SimpleUpperCase := StringToCodePoint(NextToken());
- locProp.SimpleLowerCase := StringToCodePoint(NextToken());
- NextToken();//Simple_Title_Case_Mapping
- locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
- locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
- locProp.UnifiedIdeograph := IsIncluded(locCP,AUnifiedIdeographs);
- k := IndexOf(locProp,APropList,actualPropLen);
- if (k = -1) then begin
- k := actualPropLen;
- locProp.PropID := k{ + 1};
- APropList[k] := locProp;
- Inc(actualPropLen);
- end;
- locData.PropID := k;
- if (actualDataLen >= Length(ADataLineList)) then
- SetLength(ADataLineList,(2*Length(ADataLineList)));
- ADataLineList[actualDataLen] := locData;
- Inc(actualDataLen);
- end;
- procedure Prepare();
- var
- r : TPropRec;
- begin
- SetLength(APropList,PROP_LENGTH);
- actualPropLen := 0;
- SetLength(ADataLineList,DATA_LENGTH);
- actualDataLen := 0;
- bufferLength := ADataAStream.Size;
- bufferPos := 0;
- p := ADataAStream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- SetLength(ANumericTable,500);
- actualNumLen := 0;
- FillChar(r,SizeOf(r),#0);
- r.PropID := 0;
- r.Category := ucUnassigned;
- r.DecompositionID := -1;
- r.NumericIndex := 0;
- APropList[0] := r;
- Inc(actualPropLen);
- ANumericTable[0] := 0;
- Inc(actualNumLen);
- end;
- begin
- Prepare();
- while NextLine() do
- ParseLine();
- SetLength(APropList,actualPropLen);
- SetLength(ADataLineList,actualDataLen);
- SetLength(ANumericTable,actualNumLen);
- end;
- function GetPropID(
- ACodePoint : TUnicodeCodePoint;
- const ADataLineList : TDataLineRecArray
- ) : Cardinal;
- var
- i : Integer;
- p : PDataLineRec;
- begin
- Result := 0;
- p := @ADataLineList[Low(ADataLineList)];
- for i := Low(ADataLineList) to High(ADataLineList) do begin
- if (p^.LineType = 0) then begin
- if (p^.CodePoint = ACodePoint) then begin
- Result := p^.PropID;
- Break;
- end;
- end else begin
- if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
- Result := p^.PropID;
- Break;
- end;
- end;
- Inc(p);
- end;
- end;
- procedure MakeDecomposition(
- const ARawData : TDecompositionArray;
- var ABook : TDecompositionBook
- );
- var
- i, c, locPos : Integer;
- locItem : TUnicodeCodePointArray;
- begin
- c := 0;
- for i := Low(ARawData) to High(ARawData) do
- c := c + Length(ARawData[i]);
- SetLength(ABook.CodePoints,c);
- SetLength(ABook.Index,Length(ARawData));
- locPos := 0;
- for i := Low(ARawData) to High(ARawData) do begin
- locItem := ARawData[i];
- ABook.Index[i].StartPosition := locPos;
- ABook.Index[i].Length := Length(locItem);
- Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
- locPos := locPos + Length(locItem);
- end;
- end;
- type
- PBmpSecondTableItem = ^TBmpSecondTableItem;
- function IndexOf(
- const AItem : PBmpSecondTableItem;
- const ATable : TBmpSecondTable;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : PBmpSecondTableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeBmpTables(
- var AFirstTable : TBmpFirstTable;
- var ASecondTable : TBmpSecondTable;
- const ADataLineList : TDataLineRecArray
- );
- var
- locLowByte, locHighByte : Byte;
- locTableItem : TBmpSecondTableItem;
- locCP : TUnicodeCodePoint;
- i, locSecondActualLen : Integer;
- begin
- SetLength(ASecondTable,120);
- locSecondActualLen := 0;
- for locHighByte := 0 to 255 do begin
- FillChar(locTableItem,SizeOf(locTableItem),#0);
- for locLowByte := 0 to 255 do begin
- locCP := (locHighByte * 256) + locLowByte;
- locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
- end;
- i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
- if (i = -1) then begin
- if (locSecondActualLen = Length(ASecondTable)) then
- SetLength(ASecondTable,locSecondActualLen + 50);
- i := locSecondActualLen;
- ASecondTable[i] := locTableItem;
- Inc(locSecondActualLen);
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(ASecondTable,locSecondActualLen);
- end;
- type
- P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
- function IndexOf(
- const AItem : P3lvlBmp3TableItem;
- const ATable : T3lvlBmp3Table;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : P3lvlBmp3TableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- type
- P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
- function IndexOf(
- const AItem : P3lvlBmp2TableItem;
- const ATable : T3lvlBmp2Table
- ) : Integer;overload;
- var
- i : Integer;
- p : P3lvlBmp2TableItem;
- begin
- Result := -1;
- if (Length(ATable) > 0) then begin
- p := @ATable[0];
- for i := 0 to Length(ATable) - 1 do begin
- if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeBmpTables3Levels(
- var AFirstTable : T3lvlBmp1Table;
- var ASecondTable : T3lvlBmp2Table;
- var AThirdTable : T3lvlBmp3Table;
- const ADataLineList : TDataLineRecArray
- );
- var
- locLowByte0, locLowByte1, locHighByte : Byte;
- locTableItem2 : T3lvlBmp2TableItem;
- locTableItem3 : T3lvlBmp3TableItem;
- locCP : TUnicodeCodePoint;
- i, locThirdActualLen : Integer;
- begin
- SetLength(AThirdTable,120);
- locThirdActualLen := 0;
- for locHighByte := 0 to 255 do begin
- FillChar(locTableItem2,SizeOf(locTableItem2),#0);
- for locLowByte0 := 0 to 15 do begin
- FillChar(locTableItem3,SizeOf(locTableItem3),#0);
- for locLowByte1 := 0 to 15 do begin
- locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
- locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
- end;
- i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
- if (i = -1) then begin
- if (locThirdActualLen = Length(AThirdTable)) then
- SetLength(AThirdTable,locThirdActualLen + 50);
- i := locThirdActualLen;
- AThirdTable[i] := locTableItem3;
- Inc(locThirdActualLen);
- end;
- locTableItem2[locLowByte0] := i;
- end;
- i := IndexOf(@locTableItem2,ASecondTable);
- if (i = -1) then begin
- i := Length(ASecondTable);
- SetLength(ASecondTable,(i + 1));
- ASecondTable[i] := locTableItem2;
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(AThirdTable,locThirdActualLen);
- end;
- procedure GenerateLicenceText(ADest : TStream);
- var
- s : ansistring;
- begin
- s := SLicenseText + sLineBreak + sLineBreak;
- ADest.Write(s[1],Length(s));
- end;
- procedure GenerateBmpTables(
- ADest : TStream;
- var AFirstTable : TBmpFirstTable;
- var ASecondTable : TBmpSecondTable
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- begin
- AddLine('const');
- AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 16) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
- locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
- if (((j+1) mod 16) = 0) then begin
- if (i = c) and (j = 255) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- end;
- AddLine(' );' + sLineBreak);
- end;
- //----------------------------------
- procedure Generate3lvlBmpTables(
- ADest : TStream;
- var AFirstTable : T3lvlBmp1Table;
- var ASecondTable : T3lvlBmp2Table;
- var AThirdTable : T3lvlBmp3Table
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- begin
- AddLine('const');
- AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 16) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '(';
- for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
- locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine + ')';
- if (i < c) then
- locLine := locLine + ',';
- AddLine(locLine);
- end;
- AddLine(' );' + sLineBreak);
- AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
- c := High(AThirdTable);
- for i := Low(AThirdTable) to c do begin
- locLine := '(';
- for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
- locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine + ')';
- if (i < c) then
- locLine := locLine + ',';
- AddLine(locLine);
- end;
- AddLine(' );' + sLineBreak);
- end;
- function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
- begin
- if (AEndian = ekBig) then
- Result := Format(
- '(c:$%s;b:$%s;a:$%s;)',
- [ IntToHex(AValue.byte2,1), IntToHex(AValue.byte1,1),
- IntToHex(AValue.byte0,1)
- ]
- )
- else
- Result := Format(
- '(a:$%s;b:$%s;c:$%s;)',
- [ IntToHex(AValue.byte0,1), IntToHex(AValue.byte1,1),
- IntToHex(AValue.byte2,1)
- ]
- );
- end;
- procedure GeneratePropTable(
- ADest : TStream;
- const APropList : TPropRecArray;
- const AEndian : TEndianKind
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i : Integer;
- locLine : string;
- p : PPropRec;
- begin
- AddLine('');
- AddLine('const');
- AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
- AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
- locLine := '';
- p := @APropList[0];
- for i := Low(APropList) to High(APropList) do begin //locLine := ' (CD:' + IntToStr(p^.CategoryData) + ';' +
- locLine := locLine + '(C:' + IntToStr(p^.CategoryData) + ';' +
- 'C3:' + IntToStr(p^.CCC) + ';' +
- 'N:' + IntToStr(p^.NumericIndex) + ';' +
- 'UC:' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
- 'LC:' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
- 'D:' + IntToStr(p^.DecompositionID) + ')';
- if (i < High(APropList)) then
- locLine := locLine + ',';
- if (((i+1) mod 2) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- Inc(p);
- end;
- if (locLine <> '') then
- AddLine( ' ' + locLine);
- AddLine(' );' + sLineBreak);
- end;
- procedure GenerateNumericTable(
- ADest : TStream;
- const ANumList : TNumericValueArray;
- const ACompleteUnit : Boolean
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i : Integer;
- locLine : string;
- p : ^TNumericValue;
- begin
- if ACompleteUnit then begin
- GenerateLicenceText(ADest);
- AddLine('unit unicodenumtable;');
- AddLine('interface');
- AddLine('');
- end;
- AddLine('');
- AddLine('const');
- AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
- AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
- locLine := '';
- p := @ANumList[0];
- for i := Low(ANumList) to High(ANumList) - 1 do begin
- locLine := locLine + FloatToStr(p^,FS) + ' ,';
- if (i > 0) and ((i mod 8) = 0) then begin
- AddLine(' ' + locLine);
- locLine := '';
- end;
- Inc(p);
- end;
- locLine := locLine + FloatToStr(p^,FS);
- AddLine(' ' + locLine);
- AddLine(' );' + sLineBreak);
- if ACompleteUnit then begin
- AddLine('');
- AddLine('implementation');
- AddLine('');
- AddLine('end.');
- end;
- end;
- procedure GenerateDecompositionBookTable(
- ADest : TStream;
- const ABook : TDecompositionBook;
- const AEndian : TEndianKind
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, k : Integer;
- p : ^TDecompositionIndexRec;
- cp : ^TUnicodeCodePoint;
- cp24 : UInt24;
- locLine : string;
- begin
- AddLine('const');
- AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
- AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
- AddLine('type');
- AddLine(' TDecompositionIndexRec = packed record');
- AddLine(' S : Word; //StartPosition');
- AddLine(' L : Byte; //Length');
- AddLine(' end;');
- AddLine(' TDecompositionBookRec = packed record');
- AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
- AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
- AddLine(' end;');
- AddLine('const');
- AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
- p := @ABook.Index[0];
- AddLine(' Index : (// Index BEGIN');
- k := 0;
- locLine := ' ';
- for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
- locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' +
- 'L:' + IntToStr(p^.Length) + '),';
- k := k + 1;
- if (k >= 9) then begin
- AddLine(locLine);
- locLine := ' ';
- k := 0;
- end;
- Inc(p);
- end;
- locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' +
- 'L:' + IntToStr(p^.Length) + ')';
- AddLine(locLine);
- AddLine(' ); // Index END');
- cp := @ABook.CodePoints[0];
- AddLine(' CodePoints : (// CodePoints BEGIN');
- k := 0;
- locLine := ' ';
- for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
- cp24 := cp^;
- locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
- Inc(k);
- if (k >= 16) then begin
- AddLine(locLine);
- k := 0;
- locLine := ' ';
- end;
- Inc(cp);
- end;
- cp24 := cp^;
- locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
- AddLine(locLine);
- AddLine(' ); // CodePoints END');
- AddLine(' );' + sLineBreak);
- end;
- procedure GenerateOutBmpTable(
- ADest : TStream;
- const AList : TDataLineRecArray
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, j : Integer;
- locLine : string;
- p : PDataLineRec;
- begin
- AddLine('');
- //AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
- //AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
- j := -1;
- p := @AList[0];
- for i := 0 to Length(AList) - 1 do begin
- if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
- (p^.StartCodePoint > $FFFF)
- then begin
- j := i;
- Break;
- end;
- Inc(p);
- end;
- if (j < 0) then
- exit;
- for i := j to Length(AList) - 2 do begin
- locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
- ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
- ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
- AddLine(locLine);
- Inc(p);
- end;
- locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
- ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
- ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- end;
- function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
- var
- k, i, locResLen : Integer;
- q, p, pr : PDataLineRec;
- k_end : TUnicodeCodePoint;
- begin
- locResLen := 1;
- SetLength(Result,Length(AData));
- FillChar(Result[0],Length(Result),#0);
- Result[0] := AData[0];
- q := @AData[0];
- k := 0;
- while (k < Length(AData)) do begin
- if (q^.LineType = 0) then
- k_end := q^.CodePoint
- else
- k_end := q^.EndCodePoint;
- if ((k+1) = Length(AData)) then begin
- i := k;
- end else begin
- p := @AData[k+1];
- i := k +1;
- while (i < (Length(AData) {- 1})) do begin
- if (p^.PropID <> q^.PropID) then begin
- i := i - 1;
- Break;
- end;
- if (p^.LineType = 0) then begin
- if (p^.CodePoint <> (k_end + 1)) then begin
- i := i - 1;
- Break;
- end;
- Inc(k_end);
- end else begin
- if (p^.StartCodePoint <> (k_end + 1)) then begin
- i := i - 1;
- Break;
- end;
- k_end := p^.EndCodePoint;
- end;
- Inc(i);
- Inc(p);
- end;
- end;
- {if (i = k) then begin
- Result[locResLen] := q^;
- Inc(locResLen);
- end else begin }
- p := @AData[i];
- pr := @Result[locResLen];
- pr^.PropID := q^.PropID;
- if (q^.LineType = 0) then
- pr^.StartCodePoint := q^.CodePoint
- else
- pr^.StartCodePoint := q^.StartCodePoint;
- pr^.LineType := 1;
- if (p^.LineType = 0) then
- pr^.EndCodePoint := p^.CodePoint
- else
- pr^.EndCodePoint := p^.EndCodePoint;
- Inc(locResLen);
- //end;
- k := i + 1;
- if (k = Length(AData)) then
- Break;
- q := @AData[k];
- end;
- SetLength(Result,locResLen);
- end;
- procedure ParseUCAFile(
- ADataAStream : TMemoryStream;
- var ABook : TUCA_DataBook
- );
- const
- LINE_LENGTH = 1024;
- DATA_LENGTH = 25000;
- var
- p : PAnsiChar;
- actualDataLen : Integer;
- bufferLength, bufferPos, lineLength, linePos : Integer;
- line : ansistring;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos = bufferPos) and (p^ = #10) then begin
- lineLength := 0;
- Inc(p);
- Inc(bufferPos);
- linePos := 1;
- Result := True;
- end else if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos) + 1;
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- linePos := 1;
- Result := True;
- end;
- end;
- procedure SkipSpace();
- begin
- while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
- Inc(linePos);
- end;
- function NextToken() : ansistring;
- const C_SEPARATORS = [';','#','.','[',']','*','@'];
- var
- k : Integer;
- begin
- SkipSpace();
- k := linePos;
- if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
- Result := line[linePos];
- Inc(linePos);
- exit;
- end;
- while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- end;
- procedure CheckToken(const AToken : string);
- var
- a, b : string;
- begin
- a := LowerCase(Trim(AToken));
- b := LowerCase(Trim(NextToken()));
- if (a <> b) then
- raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]);
- end;
- function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
- var
- s :AnsiString;
- k : Integer;
- begin
- Result := False;
- s := NextToken();
- if (s <> '[') then
- exit;
- s := NextToken();
- if (s = '.') then
- ADest.Variable := False
- else begin
- if (s <> '*') then
- raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
- ADest.Variable := True;
- end;
- ADest.Weights[0] := StrToInt('$'+NextToken());
- for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin
- CheckToken('.');
- ADest.Weights[k] := StrToInt('$'+NextToken());
- end;
- CheckToken(']');
- Result := True;
- end;
- procedure ParseHeaderVar();
- var
- s,ss : string;
- k : Integer;
- begin
- s := NextToken();
- if (s = 'version') then begin
- ss := '';
- while True do begin
- s := NextToken();
- if (s = '') then
- Break;
- ss := ss + s;
- end;
- ABook.Version := ss;
- end else if (s = 'variable') then begin
- if (s = 'blanked') then
- ABook.VariableWeight := ucaBlanked
- else if (s = 'non-ignorable') then
- ABook.VariableWeight := ucaNonIgnorable
- else if (s = 'shifted') then
- ABook.VariableWeight := ucaShifted
- else if (s = 'shift-trimmed') then
- ABook.VariableWeight := ucaShiftedTrimmed
- else if (s = 'ignoresp') then
- ABook.VariableWeight := ucaIgnoreSP
- else
- raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
- end else if (s = 'backwards') or (s = 'forwards') then begin
- ss := s;
- s := NextToken();
- k := StrToInt(s);
- if (k < 1) or (k > 4) then
- raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
- ABook.Backwards[k] := (s = 'backwards');
- end;
- end;
- procedure ParseLine();
- var
- locData : ^TUCA_LineRec;
- s : ansistring;
- kc : Integer;
- begin
- if (Length(ABook.Lines) <= actualDataLen) then
- SetLength(ABook.Lines,Length(ABook.Lines)*2);
- locData := @ABook.Lines[actualDataLen];
- s := NextToken();
- if (s = '') or (s[1] = '#') then
- exit;
- if (s[1] = '@') then begin
- ParseHeaderVar();
- exit;
- end;
- SetLength(locData^.CodePoints,10);
- locData^.CodePoints[0] := StrToInt('$'+s);
- kc := 1;
- while True do begin
- s := Trim(NextToken());
- if (s = '') then
- exit;
- if (s = ';') then
- Break;
- locData^.CodePoints[kc] := StrToInt('$'+s);
- Inc(kc);
- end;
- if (kc = 0) then
- exit;
- SetLength(locData^.CodePoints,kc);
- SetLength(locData^.Weights,24);
- kc := 0;
- while ReadWeightBlock(locData^.Weights[kc]) do begin
- Inc(kc);
- end;
- SetLength(locData^.Weights,kc);
- Inc(actualDataLen);
- end;
- procedure Prepare();
- var
- k : Integer;
- begin
- ABook.VariableWeight := ucaShifted;
- for k := Low(ABook.Backwards) to High(ABook.Backwards) do
- ABook.Backwards[k] := False;
- SetLength(ABook.Lines,DATA_LENGTH);
- actualDataLen := 0;
- bufferLength := ADataAStream.Size;
- bufferPos := 0;
- p := ADataAStream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- end;
- begin
- Prepare();
- while NextLine() do
- ParseLine();
- SetLength(ABook.Lines,actualDataLen);
- end;
- procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
- var
- i : Integer;
- begin
- Write(ATitle, ' ');
- for i := 0 to Length(X) - 1 do
- Write(X[i],' ');
- WriteLn();
- end;
- function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
- var
- i, hb : Integer;
- begin
- if (A=B) then
- exit(0);
- Result := 1;
- hb := Length(B^.CodePoints) - 1;
- for i := 0 to Length(A^.CodePoints) - 1 do begin
- if (i > hb) then
- exit;
- if (A^.CodePoints[i] < B^.CodePoints[i]) then
- exit(-1);
- if (A^.CodePoints[i] > B^.CodePoints[i]) then
- exit(1);
- end;
- if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
- exit(0);
- exit(-1);
- end;
- procedure QuickSort(
- var AList : TUCA_DataBookIndex;
- L, R : Longint;
- ABook : PUCA_DataBook
- );overload;
- var
- I, J : Longint;
- P, Q : Integer;
- begin
- repeat
- I := L;
- J := R;
- P := AList[ (L + R) div 2 ];
- repeat
- while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
- I := I + 1;
- while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
- J := J - 1;
- If I <= J then
- begin
- Q := AList[I];
- AList[I] := AList[J];
- AList[J] := Q;
- I := I + 1;
- J := J - 1;
- end;
- until I > J;
- // sort the smaller range recursively
- // sort the bigger range via the loop
- // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
- if J - L < R - I then
- begin
- if L < J then
- QuickSort(AList, L, J, ABook);
- L := I;
- end
- else
- begin
- if I < R then
- QuickSort(AList, I, R, ABook);
- R := J;
- end;
- until L >= R;
- end;
- function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
- var
- r : TUCA_DataBookIndex;
- i, c : Integer;
- begin
- c := Length(ABook^.Lines);
- SetLength(r,c);
- for i := 0 to c - 1 do
- r[i] := i;
- QuickSort(r,0,c-1,ABook);
- Result := r;
- end;
- function ConstructContextTree(
- const AContext : PUCA_LineContextRec;
- var ADestBuffer;
- const ADestBufferLength : Cardinal
- ) : PUCA_PropItemContextTreeRec;forward;
- function ConstructItem(
- AItem : PUCA_PropItemRec;
- ACodePoint : Cardinal;
- AValid : Byte;
- AChildCount : Byte;
- const AWeights : array of TUCA_WeightRec;
- const AStoreCP : Boolean;
- const AContext : PUCA_LineContextRec;
- const ADeleted : Boolean
- ) : Cardinal;
- var
- i : Integer;
- p : PUCA_PropItemRec;
- pw : PUCA_PropWeights;
- pb : PByte;
- hasContext : Boolean;
- contextTree : PUCA_PropItemContextTreeRec;
- wl : Integer;
- begin
- p := AItem;
- p^.Size := 0;
- p^.Flags := 0;
- p^.WeightLength := 0;
- SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
- p^.ChildCount := AChildCount;
- hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
- if hasContext then
- wl := 0
- else
- wl := Length(AWeights);
- p^.WeightLength := wl;
- if (wl = 0) then begin
- Result := SizeOf(TUCA_PropItemRec);
- if ADeleted then
- SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
- end else begin
- Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
- pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
- Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
- pb := pb + 2;
- if (AWeights[0].Weights[1] > High(Byte)) then begin
- Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
- pb := pb + 2;
- end else begin
- SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
- pb^ := AWeights[0].Weights[1];
- pb := pb + 1;
- Result := Result - 1;
- end;
- if (AWeights[0].Weights[2] > High(Byte)) then begin
- Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
- pb := pb + 2;
- end else begin
- SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
- pb^ := AWeights[0].Weights[2];
- pb := pb + 1;
- Result := Result - 1;
- end;
- pw := PUCA_PropWeights(pb);
- for i := 1 to wl - 1 do begin
- pw^.Weights[0] := AWeights[i].Weights[0];
- pw^.Weights[1] := AWeights[i].Weights[1];
- pw^.Weights[2] := AWeights[i].Weights[2];
- //pw^.Variable := BoolToByte(AWeights[i].Variable);
- Inc(pw);
- end;
- end;
- hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
- if AStoreCP or hasContext then begin
- Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
- Result := Result + SizeOf(UInt24);
- SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
- end;
- if hasContext then begin
- contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
- Result := Result + Cardinal(contextTree^.Size);
- SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
- end;
- p^.Size := Result;
- end;
- function CalcCharChildCount(
- const ASearchStartPos : Integer;
- const ALinePos : Integer;
- const ABookLines : PUCA_LineRec;
- const AMaxLength : Integer;
- const ABookIndex : TUCA_DataBookIndex;
- out ALineCount : Word
- ) : Byte;
- var
- locLinePos : Integer;
- p : PUCA_LineRec;
- procedure IncP();
- begin
- Inc(locLinePos);
- p := @ABookLines[ABookIndex[locLinePos]];
- end;
- var
- i, locTargetLen, locTargetBufferSize, r : Integer;
- locTarget : array[0..127] of Cardinal;
- locLastChar : Cardinal;
- begin
- locLinePos := ALinePos;
- p := @ABookLines[ABookIndex[locLinePos]];
- locTargetLen := ASearchStartPos;
- locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
- Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
- if (Length(p^.CodePoints) = ASearchStartPos) then begin
- r := 0;
- locLastChar := High(Cardinal);
- end else begin
- r := 1;
- locLastChar := p^.CodePoints[ASearchStartPos];
- end;
- i := 1;
- while (i < AMaxLength) do begin
- IncP();
- if (Length(p^.CodePoints) < locTargetLen) then
- Break;
- if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
- Break;
- if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
- Inc(r);
- locLastChar := p^.CodePoints[ASearchStartPos];
- end;
- Inc(i);
- end;
- ALineCount := i;
- Result := r;
- end;
- function BuildTrie(
- const ALinePos : Integer;
- const ABookLines : PUCA_LineRec;
- const AMaxLength : Integer;
- const ABookIndex : TUCA_DataBookIndex
- ) : PTrieNode;
- var
- p : PUCA_LineRec;
- root : PTrieNode;
- ki, k, i : Integer;
- key : array of TKeyType;
- begin
- k := ABookIndex[ALinePos];
- p := @ABookLines[k];
- if (Length(p^.CodePoints) = 1) then
- root := CreateNode(p^.CodePoints[0],k)
- else
- root := CreateNode(p^.CodePoints[0]);
- for i := ALinePos to ALinePos + AMaxLength - 1 do begin
- k := ABookIndex[i];
- p := @ABookLines[k];
- if (Length(p^.CodePoints) = 1) then begin
- InsertWord(root,p^.CodePoints[0],k);
- end else begin
- SetLength(key,Length(p^.CodePoints));
- for ki := 0 to Length(p^.CodePoints) - 1 do
- key[ki] := p^.CodePoints[ki];
- InsertWord(root,key,k);
- end;
- end;
- Result := root;
- end;
- function BoolToByte(AValue : Boolean): Byte;inline;
- begin
- if AValue then
- Result := 1
- else
- Result := 0;
- end;
- function InternalConstructFromTrie(
- const ATrie : PTrieNode;
- const AItem : PUCA_PropItemRec;
- const ALines : PUCA_LineRec;
- const AStoreCp : Boolean
- ) : Cardinal;
- var
- i : Integer;
- size : Cardinal;
- p : PUCA_PropItemRec;
- n : PTrieNode;
- begin
- if (ATrie = nil) then
- exit(0);
- p := AItem;
- n := ATrie;
- if n^.DataNode then
- size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
- else
- size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
- Result := size;
- if (n^.ChildCount > 0) then begin
- for i := 0 to n^.ChildCount - 1 do begin
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
- Result := Result + size;
- end;
- end;
- AItem^.Size := Result;
- end;
- function ConstructFromTrie(
- const ATrie : PTrieNode;
- const AItem : PUCA_PropItemRec;
- const ALines : PUCA_LineRec
- ) : Integer;
- begin
- Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
- end;
- procedure MakeUCA_Props(
- ABook : PUCA_DataBook;
- out AProps : PUCA_PropBook
- );
- var
- propIndexCount : Integer;
- procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
- begin
- AProps^.Index[propIndexCount].CodePoint := ACodePoint;
- AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
- propIndexCount := propIndexCount + 1;
- end;
- var
- locIndex : TUCA_DataBookIndex;
- i, c, k, kc : Integer;
- p, p1, p2 : PUCA_PropItemRec;
- lines, pl1, pl2 : PUCA_LineRec;
- childCount, lineCount : Word;
- size : Cardinal;
- trieRoot : PTrieNode;
- MaxChildCount, MaxSize : Cardinal;
- childList : array of PUCA_PropItemRec;
- begin
- locIndex := CreateIndex(ABook);
- i := Length(ABook^.Lines);
- i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
- AProps := AllocMem(SizeOf(TUCA_PropBook));
- AProps^.ItemSize := i;
- AProps^.Items := AllocMem(i);
- propIndexCount := 0;
- SetLength(AProps^.Index,Length(ABook^.Lines));
- p := AProps^.Items;
- lines := @ABook^.Lines[0];
- c := Length(locIndex);
- i := 0;
- MaxChildCount := 0; MaxSize := 0;
- while (i < (c-1)) do begin
- pl1 := @lines[locIndex[i]];
- if not pl1^.Stored then begin
- i := i + 1;
- Continue;
- end;
- pl2 := @lines[locIndex[i+1]];
- if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
- if (Length(pl1^.CodePoints) = 1) then begin
- size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
- CapturePropIndex(p,pl1^.CodePoints[0]);
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- if (size > MaxSize) then
- MaxSize := size;
- end else begin
- kc := Length(pl1^.CodePoints);
- SetLength(childList,kc);
- for k := 0 to kc - 2 do begin
- size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
- if (k = 0) then
- CapturePropIndex(p,pl1^.CodePoints[k]);
- childList[k] := p;
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- end;
- size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
- childList[kc-1] := p;
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- for k := kc - 2 downto 0 do begin
- p1 := childList[k];
- p2 := childList[k+1];
- p1^.Size := p1^.Size + p2^.Size;
- end;
- if (p1^.Size > MaxSize) then
- MaxSize := p1^.Size;
- end;
- lineCount := 1;
- end else begin
- childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
- if (childCount < 1) then
- raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
- if (lineCount < 2) then
- raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
- if (childCount > MaxChildCount) then
- MaxChildCount := childCount;
- trieRoot := BuildTrie(i,lines,lineCount,locIndex);
- size := ConstructFromTrie(trieRoot,p,lines);
- CapturePropIndex(p,pl1^.CodePoints[0]);
- FreeNode(trieRoot);
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- if (size > MaxSize) then
- MaxSize := size;
- end;
- i := i + lineCount;
- end;
- if (i = (c-1)) then begin
- pl1 := @lines[locIndex[i]];
- if (Length(pl1^.CodePoints) = 1) then begin
- size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
- CapturePropIndex(p,pl1^.CodePoints[0]);
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- if (size > MaxSize) then
- MaxSize := size;
- end else begin
- kc := Length(pl1^.CodePoints);
- SetLength(childList,kc);
- for k := 0 to kc - 2 do begin
- size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
- if (k = 0) then
- CapturePropIndex(p,pl1^.CodePoints[0]);
- childList[k] := p;
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- end;
- size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
- childList[kc-1] := p;
- p := PUCA_PropItemRec(PtrUInt(p) + size);
- for i := kc - 2 downto 0 do begin
- p1 := childList[i];
- p2 := childList[i+1];
- p1^.Size := p1^.Size + p2^.Size;
- end;
- if (size > MaxSize) then
- MaxSize := size;
- end;
- end;
- //c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
- c := UInt64(PtrUInt(p)) - UInt64(PtrUInt(AProps^.Items));
- ReAllocMem(AProps^.Items,c);
- AProps^.ItemSize := c;
- SetLength(AProps^.Index,propIndexCount);
- AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
- ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
- k := 0;
- c := High(Word);
- for i := 0 to Length(ABook^.Lines) - 1 do begin
- if (Length(ABook^.Lines[i].Weights) > 0) then begin
- if (ABook^.Lines[i].Weights[0].Variable) then begin
- if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
- k := ABook^.Lines[i].Weights[0].Weights[0];
- if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
- c := ABook^.Lines[i].Weights[0].Weights[0];
- end;
- end;
- end;
- AProps^.VariableHighLimit := k;
- AProps^.VariableLowLimit := c;
- end;
- procedure FreeUcaBook(var ABook : PUCA_PropBook);
- var
- p : PUCA_PropBook;
- begin
- if (ABook = nil) then
- exit;
- p := ABook;
- ABook := nil;
- p^.Index := nil;
- FreeMem(p^.Items,p^.ItemSize);
- FreeMem(p^.ItemsOtherEndian,p^.ItemSize);
- FreeMem(p,SizeOf(p^));
- end;
- function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
- var
- i : Integer;
- begin
- for i := 0 to Length(APropBook^.Index) - 1 do begin
- if (ACodePoint = APropBook^.Index[i].CodePoint) then
- exit(i);
- end;
- Result := -1;
- end;
- type
- PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
- function IndexOf(
- const AItem : PucaBmpSecondTableItem;
- const ATable : TucaBmpSecondTable;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : PucaBmpSecondTableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeUCA_BmpTables(
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable;
- const APropBook : PUCA_PropBook
- );
- var
- locLowByte, locHighByte : Byte;
- locTableItem : TucaBmpSecondTableItem;
- locCP : TUnicodeCodePoint;
- i, locSecondActualLen : Integer;
- k : Integer;
- begin
- SetLength(ASecondTable,120);
- locSecondActualLen := 0;
- for locHighByte := 0 to 255 do begin
- FillChar(locTableItem,SizeOf(locTableItem),#0);
- for locLowByte := 0 to 255 do begin
- locCP := (locHighByte * 256) + locLowByte;
- k := IndexOf(locCP,APropBook);
- if (k = -1) then
- k := 0
- else
- k := APropBook^.Index[k].Position + 1;
- locTableItem[locLowByte] := k;
- end;
- i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
- if (i = -1) then begin
- if (locSecondActualLen = Length(ASecondTable)) then
- SetLength(ASecondTable,locSecondActualLen + 50);
- i := locSecondActualLen;
- ASecondTable[i] := locTableItem;
- Inc(locSecondActualLen);
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(ASecondTable,locSecondActualLen);
- end;
- function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
- begin
- //copied from utf16toutf32
- Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
- end;
- procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
- begin
- AHighS := Word((AValue - $10000) shr 10 + $d800);
- ALowS := Word((AValue - $10000) and $3ff + $dc00);
- end;
- type
- PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
- function IndexOf(
- const AItem : PucaOBmpSecondTableItem;
- const ATable : TucaOBmpSecondTable;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : PucaOBmpSecondTableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeUCA_OBmpTables(
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable;
- const APropBook : PUCA_PropBook
- );
- var
- locLowByte, locHighByte : Word;
- locTableItem : TucaOBmpSecondTableItem;
- locCP : TUnicodeCodePoint;
- i, locSecondActualLen : Integer;
- k : Integer;
- begin
- if (Length(ASecondTable) = 0) then
- SetLength(ASecondTable,2000);
- locSecondActualLen := 0;
- for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
- FillChar(locTableItem,SizeOf(locTableItem),#0);
- for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
- locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
- k := IndexOf(locCP,APropBook);
- if (k = -1) then
- k := 0
- else
- k := APropBook^.Index[k].Position + 1;
- locTableItem[locLowByte] := k;
- end;
- i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
- if (i = -1) then begin
- if (locSecondActualLen = Length(ASecondTable)) then
- SetLength(ASecondTable,locSecondActualLen + 50);
- i := locSecondActualLen;
- ASecondTable[i] := locTableItem;
- Inc(locSecondActualLen);
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(ASecondTable,locSecondActualLen);
- end;
- function GetPropPosition(
- const AHighS,
- ALowS : Word;
- const AFirstTable : PucaOBmpFirstTable;
- const ASecondTable : PucaOBmpSecondTable
- ): Integer;inline;overload;
- begin
- Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
- end;
- procedure GenerateUCA_Head(
- ADest : TStream;
- ABook : PUCA_DataBook;
- AProps : PUCA_PropBook
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- begin
- AddLine('const');
- //AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
- AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
- AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
- AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
- AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
- AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
- AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
- AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
- AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
- AddLine('');
- end;
- procedure GenerateUCA_BmpTables(
- AStream,
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable
- );
- procedure AddLine(AOut : TStream; const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- AOut.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- value : UInt24;
- begin
- AddLine(AStream,'const');
- AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 16) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(AStream,locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(AStream,locLine);
- AddLine(AStream,' );' + sLineBreak);
- AddLine(ANativeEndianStream,'const');
- AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
- if (((j+1) mod 7) = 0) then begin
- if (i = c) and (j = High(TucaBmpSecondTableItem)) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANativeEndianStream,locLine);
- locLine := '';
- end;
- end;
- if (locLine <> '') then begin
- if (i = c) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANativeEndianStream,locLine);
- end;
- end;
- AddLine(ANativeEndianStream,' );' + sLineBreak);
- AddLine(ANonNativeEndianStream,'const');
- AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
- if (((j+1) mod 7) = 0) then begin
- if (i = c) and (j = High(TucaBmpSecondTableItem)) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANonNativeEndianStream,locLine);
- locLine := '';
- end;
- end;
- if (locLine <> '') then begin
- if (i = c) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANonNativeEndianStream,locLine);
- end;
- end;
- AddLine(ANonNativeEndianStream,' );' + sLineBreak);
- end;
- procedure GenerateBinaryUCA_BmpTables(
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaBmpFirstTable;
- var ASecondTable : TucaBmpSecondTable
- );
- var
- i, j : Integer;
- value : UInt24;
- begin
- ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
- ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
- for i := Low(ASecondTable) to High(ASecondTable) do begin
- for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- ANativeEndianStream.Write(value,SizeOf(value));
- ReverseBytes(value,SizeOf(value));
- ANonNativeEndianStream.Write(value,SizeOf(value));
- end;
- end;
- end;
- procedure GenerateUCA_PropTable(
- // WARNING : files must be generated for each endianess (Little / Big)
- ADest : TStream;
- const APropBook : PUCA_PropBook;
- const AEndian : TEndianKind
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, c : Integer;
- locLine : string;
- p : PByte;
- begin
- c := APropBook^.ItemSize;
- AddLine('const');
- AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
- locLine := '';
- if (AEndian = ENDIAN_NATIVE) then
- p := PByte(APropBook^.Items)
- else
- p := PByte(APropBook^.ItemsOtherEndian);
- for i := 0 to c - 2 do begin
- locLine := locLine + IntToStr(p[i]) + ',';
- if (((i+1) mod 60) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(p[c-1]);
- locLine := ' ' + locLine;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- end;
- procedure GenerateBinaryUCA_PropTable(
- // WARNING : files must be generated for each endianess (Little / Big)
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- const APropBook : PUCA_PropBook
- );
- begin
- ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
- ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
- end;
- procedure GenerateUCA_OBmpTables(
- AStream,
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable
- );
- procedure AddLine(AOut : TStream; const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- AOut.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- value : UInt24;
- begin
- AddLine(AStream,'const');
- AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 16) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(AStream,locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(AStream,locLine);
- AddLine(AStream,' );' + sLineBreak);
- AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
- if (((j+1) mod 7) = 0) then begin
- if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANativeEndianStream,locLine);
- locLine := '';
- end;
- end;
- if (locLine <> '') then begin
- if (i = c) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANativeEndianStream,locLine);
- end;
- end;
- AddLine(ANativeEndianStream,' );' + sLineBreak);
- AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
- if (((j+1) mod 7) = 0) then begin
- if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANonNativeEndianStream,locLine);
- locLine := '';
- end;
- end;
- if (locLine <> '') then begin
- if (i = c) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(ANonNativeEndianStream,locLine);
- end;
- end;
- AddLine(ANonNativeEndianStream,' );' + sLineBreak);
- end;
- procedure GenerateBinaryUCA_OBmpTables(
- ANativeEndianStream,
- ANonNativeEndianStream : TStream;
- var AFirstTable : TucaOBmpFirstTable;
- var ASecondTable : TucaOBmpSecondTable
- );
- var
- i, j : Integer;
- locLine : string;
- wordValue : Word;
- value : UInt24;
- begin
- for i := Low(AFirstTable) to High(AFirstTable) do begin
- wordValue := AFirstTable[i];
- ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
- ReverseBytes(wordValue,SizeOf(wordValue));
- ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
- end;
- for i := Low(ASecondTable) to High(ASecondTable) do begin
- for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
- value := ASecondTable[i][j];
- ANativeEndianStream.Write(value,SizeOf(value));
- ReverseBytes(value,SizeOf(value));
- ANonNativeEndianStream.Write(value,SizeOf(value));
- end;
- end;
- end;
- type
- POBmpSecondTableItem = ^TOBmpSecondTableItem;
- function IndexOf(
- const AItem : POBmpSecondTableItem;
- const ATable : TOBmpSecondTable;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : POBmpSecondTableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeOBmpTables(
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable;
- const ADataLineList : TDataLineRecArray
- );
- var
- locLowByte, locHighByte : Word;
- locTableItem : TOBmpSecondTableItem;
- locCP : TUnicodeCodePoint;
- i, locSecondActualLen : Integer;
- begin
- SetLength(ASecondTable,2000);
- locSecondActualLen := 0;
- for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
- FillChar(locTableItem,SizeOf(locTableItem),#0);
- for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
- locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
- locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
- end;
- i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
- if (i = -1) then begin
- if (locSecondActualLen = Length(ASecondTable)) then
- SetLength(ASecondTable,locSecondActualLen + 50);
- i := locSecondActualLen;
- ASecondTable[i] := locTableItem;
- Inc(locSecondActualLen);
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(ASecondTable,locSecondActualLen);
- end;
- type
- P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
- function IndexOf(
- const AItem : P3lvlOBmp3TableItem;
- const ATable : T3lvlOBmp3Table;
- const ATableActualLength : Integer
- ) : Integer;overload;
- var
- i : Integer;
- p : P3lvlOBmp3TableItem;
- begin
- Result := -1;
- if (ATableActualLength > 0) then begin
- p := @ATable[0];
- for i := 0 to ATableActualLength - 1 do begin
- if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- type
- P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
- function IndexOf(
- const AItem : P3lvlOBmp2TableItem;
- const ATable : T3lvlOBmp2Table
- ) : Integer;overload;
- var
- i : Integer;
- p : P3lvlOBmp2TableItem;
- begin
- Result := -1;
- if (Length(ATable) > 0) then begin
- p := @ATable[0];
- for i := 0 to Length(ATable) - 1 do begin
- if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
- Result := i;
- Break;
- end;
- Inc(p);
- end;
- end;
- end;
- procedure MakeOBmpTables3Levels(
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table;
- const ADataLineList : TDataLineRecArray
- );
- var
- locLowByte0, locLowByte1, locHighByte : Word;
- locTableItem2 : T3lvlOBmp2TableItem;
- locTableItem3 : T3lvlOBmp3TableItem;
- locCP : TUnicodeCodePoint;
- i, locThirdActualLen : Integer;
- begin
- SetLength(AThirdTable,120);
- locThirdActualLen := 0;
- for locHighByte := 0 to 1023 do begin
- FillChar(locTableItem2,SizeOf(locTableItem2),#0);
- for locLowByte0 := 0 to 31 do begin
- FillChar(locTableItem3,SizeOf(locTableItem3),#0);
- for locLowByte1 := 0 to 31 do begin
- locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
- locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
- end;
- i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
- if (i = -1) then begin
- if (locThirdActualLen = Length(AThirdTable)) then
- SetLength(AThirdTable,locThirdActualLen + 50);
- i := locThirdActualLen;
- AThirdTable[i] := locTableItem3;
- Inc(locThirdActualLen);
- end;
- locTableItem2[locLowByte0] := i;
- end;
- i := IndexOf(@locTableItem2,ASecondTable);
- if (i = -1) then begin
- i := Length(ASecondTable);
- SetLength(ASecondTable,(i + 1));
- ASecondTable[i] := locTableItem2;
- end;
- AFirstTable[locHighByte] := i;
- end;
- SetLength(AThirdTable,locThirdActualLen);
- end;
- procedure GenerateOBmpTables(
- ADest : TStream;
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- begin
- AddLine('const');
- AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 20) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '';
- for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
- locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
- if (((j+1) mod 16) = 0) then begin
- if (i = c) and (j = High(TOBmpSecondTableItem)) then
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- end;
- AddLine(' );' + sLineBreak);
- end;
- //----------------------------------
- procedure Generate3lvlOBmpTables(
- ADest : TStream;
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table
- );
- procedure AddLine(const ALine : ansistring);
- var
- buffer : ansistring;
- begin
- buffer := ALine + sLineBreak;
- ADest.Write(buffer[1],Length(buffer));
- end;
- var
- i, j, c : Integer;
- locLine : string;
- begin
- AddLine('const');
- AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
- locLine := '';
- for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
- locLine := locLine + IntToStr(AFirstTable[i]) + ',';
- if (((i+1) mod 20) = 0) then begin
- locLine := ' ' + locLine;
- AddLine(locLine);
- locLine := '';
- end;
- end;
- locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
- locLine := ' ' + locLine;
- AddLine(locLine);
- AddLine(' );' + sLineBreak);
- AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
- c := High(ASecondTable);
- for i := Low(ASecondTable) to c do begin
- locLine := '(';
- for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
- locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine + ')';
- if (i < c) then
- locLine := locLine + ',';
- AddLine(locLine);
- end;
- AddLine(' );' + sLineBreak);
- AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
- c := High(AThirdTable);
- for i := Low(AThirdTable) to c do begin
- locLine := '(';
- for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
- locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
- Delete(locLine,Length(locLine),1);
- locLine := ' ' + locLine + ')';
- if (i < c) then
- locLine := locLine + ',';
- AddLine(locLine);
- end;
- AddLine(' );' + sLineBreak);
- end;
- function GetProp(
- const AHighS,
- ALowS : Word;
- const AProps : TPropRecArray;
- var AFirstTable : TOBmpFirstTable;
- var ASecondTable : TOBmpSecondTable
- ): PPropRec;
- begin
- Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
- end;
- function GetProp(
- const AHighS,
- ALowS : Word;
- const AProps : TPropRecArray;
- var AFirstTable : T3lvlOBmp1Table;
- var ASecondTable : T3lvlOBmp2Table;
- var AThirdTable : T3lvlOBmp3Table
- ): PPropRec;
- begin
- Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
- //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
- end;
- { TUCA_PropItemContextTreeRec }
- function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
- begin
- if (Size = 0) then
- Result := nil
- else
- Result := PUCA_PropItemContextTreeNodeRec(
- PtrUInt(
- PtrUInt(@Self) + SizeOf(UInt24){Size}
- )
- );
- end;
- { TUCA_LineContextRec }
- procedure TUCA_LineContextRec.Clear;
- begin
- Data := nil
- end;
- procedure TUCA_LineContextRec.Assign(ASource : PUCA_LineContextRec);
- var
- c, i : Integer;
- begin
- if (ASource = nil) then begin
- Clear();
- exit;
- end;
- c := Length(ASource^.Data);
- SetLength(Self.Data,c);
- for i := 0 to c-1 do
- Self.Data[i].Assign(@ASource^.Data[i]);
- end;
- function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
- begin
- Result.Clear();
- Result.Assign(@Self);
- end;
- { TUCA_LineContextItemRec }
- procedure TUCA_LineContextItemRec.Clear();
- begin
- CodePoints := nil;
- Weights := nil;
- end;
- procedure TUCA_LineContextItemRec.Assign(ASource : PUCA_LineContextItemRec);
- begin
- if (ASource = nil) then begin
- Clear();
- exit;
- end;
- Self.CodePoints := Copy(ASource^.CodePoints);
- Self.Weights := Copy(ASource^.Weights);
- end;
- function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
- begin
- Result.Clear();
- Result.Assign(@Self);
- end;
- { TUCA_LineRec }
- procedure TUCA_LineRec.Clear;
- begin
- CodePoints := nil;
- Weights := nil;
- Deleted := False;
- Stored := False;
- Context.Clear();
- end;
- procedure TUCA_LineRec.Assign(ASource : PUCA_LineRec);
- begin
- if (ASource = nil) then begin
- Clear();
- exit;
- end;
- Self.CodePoints := Copy(ASource^.CodePoints);
- Self.Weights := Copy(ASource^.Weights);
- Self.Deleted := ASource^.Deleted;
- Self.Stored := ASource^.Stored;
- Self.Context.Assign(@ASource^.Context);
- end;
- function TUCA_LineRec.Clone : TUCA_LineRec;
- begin
- Result.Clear();
- Result.Assign(@Self);
- end;
- function TUCA_LineRec.HasContext() : Boolean;
- begin
- Result := (Length(Context.Data) > 0);
- end;
- { TPropRec }
- function TPropRec.GetCategory: TUnicodeCategory;
- begin
- Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
- end;
- function TPropRec.GetUnifiedIdeograph : Boolean;
- begin
- Result := IsBitON(CategoryData,FLAG_UNIFIED_IDEOGRAPH);
- end;
- procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
- var
- b : Byte;
- begin
- b := Ord(AValue);
- b := b shl 3;
- CategoryData := CategoryData or b;
- //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
- end;
- function TPropRec.GetWhiteSpace: Boolean;
- begin
- Result := IsBitON(CategoryData,FLAG_WHITE_SPACE);
- end;
- procedure TPropRec.SetUnifiedIdeograph(AValue : Boolean);
- begin
- SetBit(CategoryData,FLAG_UNIFIED_IDEOGRAPH,AValue);
- end;
- procedure TPropRec.SetWhiteSpace(AValue: Boolean);
- begin
- SetBit(CategoryData,FLAG_WHITE_SPACE,AValue);
- end;
- function TPropRec.GetHangulSyllable: Boolean;
- begin
- Result := IsBitON(CategoryData,FLAG_HANGUL_SYLLABLE);
- end;
- procedure TPropRec.SetHangulSyllable(AValue: Boolean);
- begin
- SetBit(CategoryData,FLAG_HANGUL_SYLLABLE,AValue);
- end;
- { TUCA_PropItemRec }
- function TUCA_PropItemRec.GetWeightSize : Word;
- var
- c : Integer;
- begin
- c := WeightLength;
- if (c = 0) then
- exit(0);
- Result := c*SizeOf(TUCA_PropWeights);
- if IsWeightCompress_1() then
- Result := Result - 1;
- if IsWeightCompress_2() then
- Result := Result - 1;
- end;
- function TUCA_PropItemRec.HasCodePoint(): Boolean;
- begin
- Result := IsBitON(Flags,FLAG_CODEPOINT);
- end;
- procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
- var
- c : Integer;
- p : PByte;
- pd : PUCA_PropWeights;
- begin
- c := WeightLength;
- p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
- pd := ADest;
- pd^.Weights[0] := PWord(p)^;
- p := p + 2;
- if not IsWeightCompress_1() then begin
- pd^.Weights[1] := PWord(p)^;
- p := p + 2;
- end else begin
- pd^.Weights[1] := p^;
- p := p + 1;
- end;
- if not IsWeightCompress_2() then begin
- pd^.Weights[2] := PWord(p)^;
- p := p + 2;
- end else begin
- pd^.Weights[2] := p^;
- p := p + 1;
- end;
- if (c > 1) then
- Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
- end;
- function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
- begin
- Result := SizeOf(TUCA_PropItemRec);
- if (WeightLength > 0) then begin
- Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
- if IsWeightCompress_1() then
- Result := Result - 1;
- if IsWeightCompress_2() then
- Result := Result - 1;
- end;
- if HasCodePoint() then
- Result := Result + SizeOf(UInt24);
- if Contextual then
- Result := Result + Cardinal(GetContext()^.Size);
- end;
- procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
- begin
- SetBit(Flags,FLAG_CONTEXTUAL,AValue);
- end;
- function TUCA_PropItemRec.GetContextual : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_CONTEXTUAL);
- end;
- function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
- var
- p : PtrUInt;
- begin
- if not Contextual then
- exit(nil);
- p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
- if IsBitON(Flags,FLAG_CODEPOINT) then
- p := p + SizeOf(UInt24);
- Result := PUCA_PropItemContextTreeRec(p);
- end;
- procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
- begin
- SetBit(Flags,FLAG_DELETION,AValue);
- end;
- function TUCA_PropItemRec.IsDeleted: Boolean;
- begin
- Result := IsBitON(Flags,FLAG_DELETION);
- end;
- function TUCA_PropItemRec.IsValid() : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_VALID);
- end;
- function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
- end;
- function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
- begin
- Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
- end;
- function TUCA_PropItemRec.GetCodePoint: UInt24;
- begin
- if HasCodePoint() then begin
- if Contextual then
- Result := PUInt24(
- PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
- Cardinal(GetContext()^.Size)
- )^
- else
- Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
- end else begin
- raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
- end
- end;
- function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
- var
- a, b : PUCA_LineContextItemRec;
- i, hb : Integer;
- begin
- if (Item1 = Item2) then
- exit(0);
- if (Item1 = nil) then
- exit(-1);
- if (Item2 = nil) then
- exit(1);
- a := Item1;
- b := Item2;
- if (a^.CodePoints = b^.CodePoints) then
- exit(0);
- Result := 1;
- hb := Length(b^.CodePoints) - 1;
- for i := 0 to Length(a^.CodePoints) - 1 do begin
- if (i > hb) then
- exit;
- if (a^.CodePoints[i] < b^.CodePoints[i]) then
- exit(-1);
- if (a^.CodePoints[i] > b^.CodePoints[i]) then
- exit(1);
- end;
- if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
- exit(0);
- exit(-1);
- end;
- function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
- var
- r : TAVLTree;
- i : Integer;
- begin
- r := TAVLTree.Create(@avl_CompareCodePoints);
- try
- for i := 0 to Length(AContext^.Data) - 1 do
- r.Add(@AContext^.Data[i]);
- Result := r;
- except
- FreeAndNil(r);
- raise;
- end;
- end;
- function ConstructContextTree(
- const AContext : PUCA_LineContextRec;
- var ADestBuffer;
- const ADestBufferLength : Cardinal
- ) : PUCA_PropItemContextTreeRec;
- function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
- var
- kitem : PUCA_LineContextItemRec;
- begin
- if (AItem = nil) then
- exit(0);
- kitem := AItem.Data;
- Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
- SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
- SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
- (Length(kitem^.CodePoints)*SizeOf(UInt24)) +
- SizeOf(PUCA_PropItemContextRec^.WeightCount) +
- (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
- end;
- function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
- begin
- if (AItem = nil) then
- exit(0);
- Result := CalcItemOnlySize(AItem);
- if (AItem.Left <> nil) then
- Result := Result + CalcItemSize(AItem.Left);
- if (AItem.Right <> nil) then
- Result := Result + CalcItemSize(AItem.Right);
- end;
- function CalcSize(AData : TAVLTree) : Cardinal;
- begin
- Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
- end;
- function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
- var
- k : Integer;
- kitem : PUCA_LineContextItemRec;
- kpcp : PUInt24;
- kpw : PUCA_PropWeights;
- pextra : PtrUInt;
- pnext : PUCA_PropItemContextTreeNodeRec;
- begin
- kitem := ASource.Data;
- ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
- ADest^.Data.WeightCount := Length(kitem^.Weights);
- pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
- SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
- if (ADest^.Data.CodePointCount > 0) then begin
- kpcp := PUInt24(pextra);
- for k := 0 to ADest^.Data.CodePointCount - 1 do begin
- kpcp^ := kitem^.CodePoints[k];
- Inc(kpcp);
- end;
- end;
- if (ADest^.Data.WeightCount > 0) then begin
- kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
- for k := 0 to ADest^.Data.WeightCount - 1 do begin
- kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
- kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
- kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
- Inc(kpw);
- end;
- end;
- Result := CalcItemOnlySize(ASource);
- if (ASource.Left <> nil) then begin
- pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
- ADest^.Left := Result;
- Result := Result + ConstructItem(ASource.Left,pnext);
- end else begin
- ADest^.Left := 0;
- end;
- if (ASource.Right <> nil) then begin
- pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
- ADest^.Right := Result;
- Result := Result + ConstructItem(ASource.Right,pnext);
- end else begin
- ADest^.Right := 0;
- end;
- end;
- var
- c : PtrUInt;
- r : PUCA_PropItemContextTreeRec;
- p : PUCA_PropItemContextTreeNodeRec;
- tempTree : TAVLTree;
- begin
- tempTree := ConstructAvlContextTree(AContext);
- try
- c := CalcSize(tempTree);
- if (ADestBufferLength > 0) and (c > ADestBufferLength) then
- raise Exception.Create(SInsufficientMemoryBuffer);
- r := @ADestBuffer;
- r^.Size := c;
- p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
- ConstructItem(tempTree.Root,p);
- finally
- tempTree.Free();
- end;
- Result := r;
- end;
- procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
- begin
- ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
- ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
- ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
- ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
- ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
- ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
- ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
- end;
- procedure ReverseBytes(var AData; const ALength : Integer);
- var
- i,j : PtrInt;
- c : Byte;
- p : PByte;
- begin
- if (ALength = 1) then
- exit;
- p := @AData;
- j := ALength div 2;
- for i := 0 to Pred(j) do begin
- c := p[i];
- p[i] := p[(ALength - 1 ) - i];
- p[(ALength - 1 ) - i] := c;
- end;
- end;
- procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
- var
- p : PByte;
- i : PtrInt;
- begin
- if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
- p := @AValue;
- for i := 0 to Pred(AArrayLength) do begin
- ReverseBytes(p^,AItemSize);
- Inc(p,AItemSize);
- end;
- end;
- end;
- procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
- var
- k : PtrUInt;
- p_s, p_d : PByte;
- begin
- d^.Left := s^.Left;
- ReverseBytes(d^.Left,SizeOf(d^.Left));
- d^.Right := s^.Right;
- ReverseBytes(d^.Right,SizeOf(d^.Right));
- d^.Data.CodePointCount := s^.Data.CodePointCount;
- ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
- d^.Data.WeightCount := s^.Data.WeightCount;
- ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
- k := SizeOf(TUCA_PropItemContextTreeNodeRec);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- k := (s^.Data.CodePointCount*SizeOf(UInt24));
- Move(p_s^,p_d^, k);
- ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
- Move(p_s^,p_d^,k);
- ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
- if (s^.Left > 0) then
- ReverseContextNodeFromNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
- );
- if (s^.Right > 0) then
- ReverseContextNodeFromNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
- );
- end;
- procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
- var
- k : PtrUInt;
- begin
- d^.Size := s^.Size;
- ReverseBytes(d^.Size,SizeOf(d^.Size));
- if (s^.Size = 0) then
- exit;
- k := SizeOf(s^.Size);
- ReverseContextNodeFromNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
- );
- end;
- procedure ReverseFromNativeEndian(
- const AData : PUCA_PropItemRec;
- const ADataLen : Cardinal;
- const ADest : PUCA_PropItemRec
- );
- var
- s, d : PUCA_PropItemRec;
- sCtx, dCtx : PUCA_PropItemContextTreeRec;
- dataEnd : PtrUInt;
- k, i : PtrUInt;
- p_s, p_d : PByte;
- pw_s, pw_d : PUCA_PropWeights;
- begin
- dataEnd := PtrUInt(AData) + ADataLen;
- s := AData;
- d := ADest;
- while True do begin
- d^.WeightLength := s^.WeightLength;
- ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
- d^.ChildCount := s^.ChildCount;
- ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
- d^.Size := s^.Size;
- ReverseBytes(d^.Size,SizeOf(d^.Size));
- d^.Flags := s^.Flags;
- ReverseBytes(d^.Flags,SizeOf(d^.Flags));
- if s^.Contextual then begin
- k := SizeOf(TUCA_PropItemRec);
- if s^.HasCodePoint() then
- k := k + SizeOf(UInt24);
- sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
- dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
- ReverseContextFromNativeEndian(sCtx,dCtx);
- end;
- if s^.HasCodePoint() then begin
- if s^.Contextual then
- k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
- else
- k := s^.GetSelfOnlySize() - SizeOf(UInt24);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
- ReverseBytes(p_d^,SizeOf(UInt24));
- end;
- if (s^.WeightLength > 0) then begin
- k := SizeOf(TUCA_PropItemRec);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- ReverseBytes(Unaligned(p_d^),k);
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- if s^.IsWeightCompress_1() then begin
- k := SizeOf(Byte);
- PByte(p_d)^ := PByte(p_s)^;
- end else begin
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- end;
- ReverseBytes(p_d^,k);
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- if s^.IsWeightCompress_2() then begin
- k := SizeOf(Byte);
- PByte(p_d)^ := PByte(p_s)^;
- end else begin
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- end;
- ReverseBytes(p_d^,k);
- if (s^.WeightLength > 1) then begin
- pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
- pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
- for i := 1 to s^.WeightLength - 1 do begin
- pw_d^.Weights[0] := pw_s^.Weights[0];
- pw_d^.Weights[1] := pw_s^.Weights[1];
- pw_d^.Weights[2] := pw_s^.Weights[2];
- ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
- Inc(pw_s);
- Inc(pw_d);
- end;
- end;
- end;
- k := s^.GetSelfOnlySize();
- s := PUCA_PropItemRec(PtrUInt(s)+k);
- d := PUCA_PropItemRec(PtrUInt(d)+k);
- if (PtrUInt(s) >= dataEnd) then
- Break;
- end;
- if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
- raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
- end;
- //------------------------------------------------------------------------------
- procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
- var
- k : PtrUInt;
- p_s, p_d : PByte;
- begin
- d^.Left := s^.Left;
- ReverseBytes(d^.Left,SizeOf(d^.Left));
- d^.Right := s^.Right;
- ReverseBytes(d^.Right,SizeOf(d^.Right));
- d^.Data.CodePointCount := s^.Data.CodePointCount;
- ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
- d^.Data.WeightCount := s^.Data.WeightCount;
- ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
- k := SizeOf(TUCA_PropItemContextTreeNodeRec);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- k := (d^.Data.CodePointCount*SizeOf(UInt24));
- Move(p_s^,p_d^, k);
- ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
- Move(p_s^,p_d^,k);
- ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
- if (d^.Left > 0) then
- ReverseContextNodeToNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
- );
- if (d^.Right > 0) then
- ReverseContextNodeToNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
- );
- end;
- procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
- var
- k : PtrUInt;
- begin
- d^.Size := s^.Size;
- ReverseBytes(d^.Size,SizeOf(d^.Size));
- if (s^.Size = 0) then
- exit;
- k := SizeOf(s^.Size);
- ReverseContextNodeToNativeEndian(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
- );
- end;
- procedure ReverseToNativeEndian(
- const AData : PUCA_PropItemRec;
- const ADataLen : Cardinal;
- const ADest : PUCA_PropItemRec
- );
- var
- s, d : PUCA_PropItemRec;
- sCtx, dCtx : PUCA_PropItemContextTreeRec;
- dataEnd : PtrUInt;
- k, i : PtrUInt;
- p_s, p_d : PByte;
- pw_s, pw_d : PUCA_PropWeights;
- begin
- dataEnd := PtrUInt(AData) + ADataLen;
- s := AData;
- d := ADest;
- while True do begin
- d^.WeightLength := s^.WeightLength;
- ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
- d^.ChildCount := s^.ChildCount;
- ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
- d^.Size := s^.Size;
- ReverseBytes(d^.Size,SizeOf(d^.Size));
- d^.Flags := s^.Flags;
- ReverseBytes(d^.Flags,SizeOf(d^.Flags));
- if d^.Contextual then begin
- k := SizeOf(TUCA_PropItemRec);
- if d^.HasCodePoint() then
- k := k + SizeOf(UInt24);
- sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
- dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
- ReverseContextToNativeEndian(sCtx,dCtx);
- end;
- if d^.HasCodePoint() then begin
- if d^.Contextual then
- k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
- else
- k := d^.GetSelfOnlySize() - SizeOf(UInt24);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
- ReverseBytes(p_d^,SizeOf(UInt24));
- end;
- if (d^.WeightLength > 0) then begin
- k := SizeOf(TUCA_PropItemRec);
- p_s := PByte(PtrUInt(s) + k);
- p_d := PByte(PtrUInt(d) + k);
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- ReverseBytes(p_d^,k);
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- if d^.IsWeightCompress_1() then begin
- k := SizeOf(Byte);
- PByte(p_d)^ := PByte(p_s)^;
- end else begin
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- end;
- ReverseBytes(p_d^,k);
- p_s := PByte(PtrUInt(p_s) + k);
- p_d := PByte(PtrUInt(p_d) + k);
- if d^.IsWeightCompress_2() then begin
- k := SizeOf(Byte);
- PByte(p_d)^ := PByte(p_s)^;
- end else begin
- k := SizeOf(Word);
- Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
- end;
- ReverseBytes(p_d^,k);
- if (d^.WeightLength > 1) then begin
- pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
- pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
- for i := 1 to d^.WeightLength - 1 do begin
- pw_d^.Weights[0] := pw_s^.Weights[0];
- pw_d^.Weights[1] := pw_s^.Weights[1];
- pw_d^.Weights[2] := pw_s^.Weights[2];
- ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
- Inc(pw_s);
- Inc(pw_d);
- end;
- end;
- end;
- k := d^.GetSelfOnlySize();
- s := PUCA_PropItemRec(PtrUInt(s)+k);
- d := PUCA_PropItemRec(PtrUInt(d)+k);
- if (PtrUInt(s) >= dataEnd) then
- Break;
- end;
- if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
- raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
- end;
- procedure Check(const ACondition : Boolean; const AMsg : string);overload;
- begin
- if not ACondition then
- raise Exception.Create(AMsg);
- end;
- procedure Check(
- const ACondition : Boolean;
- const AFormatMsg : string;
- const AArgs : array of const
- );overload;
- begin
- Check(ACondition,Format(AFormatMsg,AArgs));
- end;
- procedure Check(const ACondition : Boolean);overload;
- begin
- Check(ACondition,'Check failed.')
- end;
- procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
- var
- i : Integer;
- begin
- if (ALength > 0) then begin
- for i := 0 to ALength - 1 do begin
- Check(a[i].Weights[0]=b[i].Weights[0]);
- Check(a[i].Weights[1]=b[i].Weights[1]);
- Check(a[i].Weights[2]=b[i].Weights[2]);
- end;
- end;
- end;
- procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
- var
- i : Integer;
- begin
- if (ALength > 0) then begin
- for i := 0 to ALength - 1 do
- Check(a[i]=b[i]);
- end;
- end;
- procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
- var
- a, b : PUCA_PropItemContextTreeNodeRec;
- k : Cardinal;
- begin
- if (AProp1=nil) then begin
- Check(AProp2=nil);
- exit;
- end;
- a := AProp1;
- b := AProp2;
- Check(a^.Left=b^.Left);
- Check(a^.Right=b^.Right);
- Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
- Check(a^.Data.WeightCount=b^.Data.WeightCount);
- k := SizeOf(a^.Data);
- CompareCodePoints(
- PUInt24(PtrUInt(a)+k),
- PUInt24(PtrUInt(b)+k),
- a^.Data.CodePointCount
- );
- k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
- CompareWeights(
- PUCA_PropWeights(PtrUInt(a)+k),
- PUCA_PropWeights(PtrUInt(b)+k),
- a^.Data.WeightCount
- );
- if (a^.Left > 0) then begin
- k := a^.Left;
- CompareContextNode(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
- );
- end;
- if (a^.Right > 0) then begin
- k := a^.Right;
- CompareContextNode(
- PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
- PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
- );
- end;
- end;
- procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
- var
- a, b : PUCA_PropItemContextTreeNodeRec;
- k : Integer;
- begin
- if (AProp1=nil) then begin
- Check(AProp2=nil);
- exit;
- end;
- Check(AProp1^.Size=AProp2^.Size);
- k := Cardinal(AProp1^.Size);
- a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
- b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
- CompareContextNode(a,b);
- end;
- procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
- var
- a, b, pend : PUCA_PropItemRec;
- wa, wb : array of TUCA_PropWeights;
- k : Integer;
- begin
- if (ADataLen <= 0) then
- exit;
- a := PUCA_PropItemRec(AProp1);
- b := PUCA_PropItemRec(AProp2);
- pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
- while (a<pend) do begin
- Check(a^.WeightLength=b^.WeightLength);
- Check(a^.ChildCount=b^.ChildCount);
- Check(a^.Size=b^.Size);
- Check(a^.Flags=b^.Flags);
- if a^.HasCodePoint() then
- Check(a^.CodePoint = b^.CodePoint);
- if (a^.WeightLength > 0) then begin
- k := a^.WeightLength;
- SetLength(wa,k);
- SetLength(wb,k);
- a^.GetWeightArray(@wa[0]);
- b^.GetWeightArray(@wb[0]);
- CompareWeights(@wa[0],@wb[0],k);
- end;
- if a^.Contextual then
- CompareContext(a^.GetContext(),b^.GetContext());
- Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
- k := a^.GetSelfOnlySize();
- a := PUCA_PropItemRec(PtrUInt(a)+k);
- b := PUCA_PropItemRec(PtrUInt(b)+k);
- end;
- end;
- Procedure QuickSort(AList : PCardinal; L, R : Longint);overload;
- var
- I, J : Longint;
- P, Q : Cardinal;
- begin
- repeat
- I := L;
- J := R;
- P := AList[ (L + R) div 2 ];
- repeat
- while (P > AList[i]) do
- I := I + 1;
- while (P < AList[J]) do
- J := J - 1;
- If I <= J then
- begin
- Q := AList[I];
- AList[I] := AList[J];
- AList[J] := Q;
- I := I + 1;
- J := J - 1;
- end;
- until I > J;
- if J - L < R - I then
- begin
- if L < J then
- QuickSort(AList, L, J);
- L := I;
- end
- else
- begin
- if I < R then
- QuickSort(AList, I, R);
- R := J;
- end;
- until L >= R;
- end;
- function CalcMaxLevel2Count(
- const ALevel1Value : Cardinal;
- ALines : array of TUCA_LineRec
- ) : Integer;
- var
- i, c, k : Integer;
- ac : Integer;
- items : array of Cardinal;
- p : PUCA_LineRec;
- pw : ^TUCA_WeightRec;
- begin
- c := Length(ALines);
- if (c < 1) then
- exit(0);
- SetLength(items,0);
- ac := 0;
- p := @ALines[Low(ALines)];
- for i := 0 to c-1 do begin
- if (Length(p^.Weights) > 0) then begin
- pw := @p^.Weights[Low(p^.Weights)];
- for k := 0 to Length(p^.Weights)-1 do begin
- if (pw^.Weights[0] = ALevel1Value) then begin
- if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
- if (ac >= Length(items)) then
- SetLength(items,Length(items)+256);
- items[ac] := pw^.Weights[1];
- ac := ac+1;
- end;
- end;
- Inc(pw);
- end;
- end;
- Inc(p);
- end;
- Result := ac;
- end;
- function RewriteLevel2(
- const ALevel1Value : Cardinal;
- ALines : PUCA_LineRec;
- const ALinesLength : Integer
- ) : Integer;
- var
- i, c, k : Integer;
- ac : Integer;
- items : array of Cardinal;
- p : PUCA_LineRec;
- pw : ^TUCA_WeightRec;
- newValue : Int64;
- begin
- c := ALinesLength;
- if (c < 1) then
- exit(0);
- SetLength(items,256);
- ac := 0;
- p := ALines;
- for i := 0 to c-1 do begin
- if (Length(p^.Weights) > 0) then begin
- for k := 0 to Length(p^.Weights)-1 do begin
- pw := @p^.Weights[k];
- if (pw^.Weights[0] = ALevel1Value) then begin
- if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
- if (ac >= Length(items)) then
- SetLength(items,Length(items)+256);
- items[ac] := pw^.Weights[1];
- ac := ac+1;
- end;
- end;
- end;
- end;
- Inc(p);
- end;
- SetLength(items,ac);
- if (ac > 1) then
- QuickSort(@items[0],0,(ac-1));
- p := ALines;
- for i := 0 to c-1 do begin
- if (Length(p^.Weights) > 0) then begin
- for k := 0 to Length(p^.Weights)-1 do begin
- pw := @p^.Weights[k];
- if (pw^.Weights[0] = ALevel1Value) then begin
- newValue := IndexDWord(items[0],ac,pw^.Weights[1]);
- if (newValue < 0) then
- raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]);
- pw^.Weights[1] := newValue;//+1;
- end;
- end;
- end;
- Inc(p);
- end;
- if (Length(items) > 0) then
- Result := items[Length(items)-1]
- else
- Result := 0;
- end;
- procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
- var
- c, i, ac, k : Integer;
- p : PUCA_LineRec;
- level1List : array of Cardinal;
- pw : ^TUCA_WeightRec;
- begin
- c := ALength;
- if (c < 1) then
- exit;
- ac := 0;
- SetLength(level1List,c);
- p := ALines;
- for i := 0 to c-1 do begin
- if (Length(p^.Weights) > 0) then begin
- for k := 0 to Length(p^.Weights)-1 do begin
- pw := @p^.Weights[k];
- if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin
- if (ac >= Length(level1List)) then
- SetLength(level1List,ac+1000);
- level1List[ac] := pw^.Weights[0];
- RewriteLevel2(level1List[ac],ALines,ALength);
- ac := ac+1;
- end;
- end;
- end;
- Inc(p);
- end;
- end;
- function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
- var
- i, c, k, tempValue : Integer;
- p : PUCA_LineRec;
- maxLevel : Cardinal;
- maxValue : Integer;
- begin
- c := Length(ALines);
- if (c < 2) then
- exit(0);
- maxLevel := 0;
- maxValue := CalcMaxLevel2Count(maxLevel,ALines);
- p := @ALines[Low(ALines)+1];
- for i := 1 to c-1 do begin
- if (Length(p^.Weights) > 0) then begin
- for k := 0 to Length(p^.Weights)-1 do begin
- if (p^.Weights[k].Weights[0] <> maxLevel) then begin
- tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines);
- if (tempValue > maxValue) then begin
- maxLevel := p^.Weights[k].Weights[0];
- maxValue := tempValue;
- end;
- end;
- end;
- end;
- Inc(p);
- end;
- Result := maxValue;
- end;
- initialization
- FS := DefaultFormatSettings;
- FS.DecimalSeparator := '.';
- end.
|