helper.pas 134 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680
  1. { Unicode parser helper unit.
  2. Copyright (c) 2012 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
  15. unit helper;
  16. {$mode delphi}
  17. {$H+}
  18. {$PACKENUM 1}
  19. {$pointermath on}
  20. {$typedaddress on}
  21. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  22. {$macro on}
  23. {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
  24. {$define X_PACKED:=}
  25. {$else FPC_REQUIRES_PROPER_ALIGNMENT}
  26. {$define X_PACKED:=packed}
  27. {$endif FPC_REQUIRES_PROPER_ALIGNMENT}
  28. interface
  29. uses
  30. Classes, SysUtils, StrUtils;
  31. const
  32. SLicenseText =
  33. ' { Unicode implementation tables. ' + sLineBreak +
  34. ' ' + sLineBreak +
  35. ' Copyright (c) 2013 by Inoussa OUEDRAOGO ' + sLineBreak +
  36. ' ' + sLineBreak +
  37. ' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
  38. ' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
  39. ' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
  40. ' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
  41. ' Files or Software without restriction, including without ' + sLineBreak +
  42. ' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
  43. ' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
  44. ' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
  45. ' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
  46. ' notice(s) and this permission notice appear with all copies ' + sLineBreak +
  47. ' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
  48. ' notice(s) and this permission notice appear in associated ' + sLineBreak +
  49. ' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
  50. ' Data File or in the Software as well as in the documentation ' + sLineBreak +
  51. ' associated with the Data File(s) or Software that the data or ' + sLineBreak +
  52. ' software has been modified. ' + sLineBreak +
  53. ' ' + sLineBreak +
  54. ' ' + sLineBreak +
  55. ' This program is distributed in the hope that it will be useful, ' + sLineBreak +
  56. ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
  57. ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
  58. type
  59. // Unicode General Category
  60. TUnicodeCategory = (
  61. ucUppercaseLetter, // Lu = Letter, uppercase
  62. ucLowercaseLetter, // Ll = Letter, lowercase
  63. ucTitlecaseLetter, // Lt = Letter, titlecase
  64. ucModifierLetter, // Lm = Letter, modifier
  65. ucOtherLetter, // Lo = Letter, other
  66. ucNonSpacingMark, // Mn = Mark, nonspacing
  67. ucCombiningMark, // Mc = Mark, spacing combining
  68. ucEnclosingMark, // Me = Mark, enclosing
  69. ucDecimalNumber, // Nd = Number, decimal digit
  70. ucLetterNumber, // Nl = Number, letter
  71. ucOtherNumber, // No = Number, other
  72. ucConnectPunctuation, // Pc = Punctuation, connector
  73. ucDashPunctuation, // Pd = Punctuation, dash
  74. ucOpenPunctuation, // Ps = Punctuation, open
  75. ucClosePunctuation, // Pe = Punctuation, close
  76. ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
  77. ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
  78. ucOtherPunctuation, // Po = Punctuation, other
  79. ucMathSymbol, // Sm = Symbol, math
  80. ucCurrencySymbol, // Sc = Symbol, currency
  81. ucModifierSymbol, // Sk = Symbol, modifier
  82. ucOtherSymbol, // So = Symbol, other
  83. ucSpaceSeparator, // Zs = Separator, space
  84. ucLineSeparator, // Zl = Separator, line
  85. ucParagraphSeparator, // Zp = Separator, paragraph
  86. ucControl, // Cc = Other, control
  87. ucFormat, // Cf = Other, format
  88. ucSurrogate, // Cs = Other, surrogate
  89. ucPrivateUse, // Co = Other, private use
  90. ucUnassigned // Cn = Other, not assigned (including noncharacters)
  91. );
  92. TUInt24Rec = packed record
  93. public
  94. {$ifdef FPC_LITTLE_ENDIAN}
  95. byte0, byte1, byte2 : Byte;
  96. {$else FPC_LITTLE_ENDIAN}
  97. byte2, byte1, byte0 : Byte;
  98. {$endif FPC_LITTLE_ENDIAN}
  99. public
  100. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  107. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  108. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  109. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  110. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  111. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  112. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  113. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  114. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  115. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  116. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  117. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  118. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  119. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  120. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  121. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  122. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  123. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  124. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  125. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  126. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  127. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  128. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  129. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  130. end;
  131. UInt24 = TUInt24Rec;
  132. PUInt24 = ^UInt24;
  133. TUnicodeCodePoint = Cardinal;
  134. TUnicodeCodePointArray = array of TUnicodeCodePoint;
  135. TDecompositionArray = array of TUnicodeCodePointArray;
  136. TNumericValue = Double;
  137. TNumericValueArray = array of TNumericValue;
  138. TBlockItemRec = packed record
  139. RangeStart : TUnicodeCodePoint;
  140. RangeEnd : TUnicodeCodePoint;
  141. Name : string[120];
  142. CanonicalName : string[120];
  143. end;
  144. TBlocks = array of TBlockItemRec;
  145. PPropRec = ^TPropRec;
  146. { TPropRec }
  147. TPropRec = packed record
  148. private
  149. function GetCategory : TUnicodeCategory;inline;
  150. procedure SetCategory(AValue : TUnicodeCategory);
  151. function GetWhiteSpace : Boolean;inline;
  152. procedure SetWhiteSpace(AValue : Boolean);
  153. function GetHangulSyllable : Boolean;inline;
  154. procedure SetHangulSyllable(AValue : Boolean);
  155. public
  156. CategoryData : Byte;
  157. PropID : Word;
  158. CCC : Byte; // Canonical Combining Class
  159. NumericIndex : Byte;
  160. SimpleUpperCase : UInt24;
  161. SimpleLowerCase : UInt24;
  162. DecompositionID : SmallInt;
  163. public
  164. property Category : TUnicodeCategory read GetCategory write SetCategory;
  165. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  166. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  167. end;
  168. TPropRecArray = array of TPropRec;
  169. TDecompositionIndexRec = packed record
  170. StartPosition : Word;
  171. Length : Byte;
  172. end;
  173. TDecompositionBook = X_PACKED record
  174. Index : array of TDecompositionIndexRec;
  175. CodePoints : array of TUnicodeCodePoint;
  176. end;
  177. PDataLineRec = ^TDataLineRec;
  178. TDataLineRec = record
  179. PropID : Integer;
  180. case LineType : Byte of
  181. 0 : (CodePoint : TUnicodeCodePoint);
  182. 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
  183. end;
  184. TDataLineRecArray = array of TDataLineRec;
  185. TCodePointRec = record
  186. case LineType : Byte of
  187. 0 : (CodePoint : TUnicodeCodePoint);
  188. 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
  189. end;
  190. TCodePointRecArray = array of TCodePointRec;
  191. TPropListLineRec = packed record
  192. CodePoint : TCodePointRec;
  193. PropName : string[123];
  194. end;
  195. TPropListLineRecArray = array of TPropListLineRec;
  196. TUCA_WeightRec = packed record
  197. Weights : array[0..3] of Cardinal;
  198. Variable : Boolean;
  199. end;
  200. TUCA_WeightRecArray = array of TUCA_WeightRec;
  201. TUCA_LineContextItemRec = X_PACKED record
  202. public
  203. CodePoints : TUnicodeCodePointArray;
  204. Weights : TUCA_WeightRecArray;
  205. public
  206. procedure Clear();
  207. procedure Assign(ASource : TUCA_LineContextItemRec);
  208. function Clone() : TUCA_LineContextItemRec;
  209. end;
  210. PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
  211. TUCA_LineContextRec = X_PACKED record
  212. public
  213. Data : array of TUCA_LineContextItemRec;
  214. public
  215. procedure Clear();
  216. procedure Assign(ASource : TUCA_LineContextRec);
  217. function Clone() : TUCA_LineContextRec;
  218. end;
  219. PUCA_LineContextRec = ^TUCA_LineContextRec;
  220. { TUCA_LineRec }
  221. TUCA_LineRec = X_PACKED record
  222. public
  223. CodePoints : TUnicodeCodePointArray;
  224. Weights : TUCA_WeightRecArray;
  225. Context : TUCA_LineContextRec;
  226. //Variable : Boolean;
  227. Deleted : Boolean;
  228. Stored : Boolean;
  229. public
  230. procedure Clear();
  231. procedure Assign(ASource : TUCA_LineRec);
  232. function Clone() : TUCA_LineRec;
  233. function HasContext() : Boolean;
  234. end;
  235. PUCA_LineRec = ^TUCA_LineRec;
  236. TUCA_VariableKind = (
  237. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  238. ucaIgnoreSP
  239. );
  240. TUCA_DataBook = X_PACKED record
  241. Version : string;
  242. VariableWeight : TUCA_VariableKind;
  243. Backwards : array[0..3] of Boolean;
  244. Lines : array of TUCA_LineRec;
  245. end;
  246. PUCA_DataBook = ^TUCA_DataBook;
  247. TUCA_DataBookIndex = array of Integer;
  248. type
  249. TUCA_PropWeights = packed record
  250. Weights : array[0..2] of Word;
  251. //Variable : Byte;
  252. end;
  253. PUCA_PropWeights = ^TUCA_PropWeights;
  254. TUCA_PropItemContextRec = packed record
  255. CodePointCount : Byte;
  256. WeightCount : Byte;
  257. //CodePoints : UInt24;
  258. //Weights : TUCA_PropWeights;
  259. end;
  260. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  261. TUCA_PropItemContextTreeNodeRec = packed record
  262. Left : Word;
  263. Right : Word;
  264. Data : TUCA_PropItemContextRec;
  265. end;
  266. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  267. TUCA_PropItemContextTreeRec = packed record
  268. public
  269. Size : UInt24;
  270. public
  271. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  272. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  273. end;
  274. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  275. { TUCA_PropItemRec }
  276. TUCA_PropItemRec = packed record
  277. private
  278. const FLAG_VALID = 0;
  279. const FLAG_CODEPOINT = 1;
  280. const FLAG_CONTEXTUAL = 2;
  281. const FLAG_DELETION = 3;
  282. const FLAG_COMPRESS_WEIGHT_1 = 6;
  283. const FLAG_COMPRESS_WEIGHT_2 = 7;
  284. private
  285. function GetWeightSize : Word;inline;
  286. public
  287. WeightLength : Byte;
  288. ChildCount : Byte;
  289. Size : Word;
  290. Flags : Byte;
  291. public
  292. function HasCodePoint() : Boolean;inline;
  293. function GetCodePoint() : UInt24;//inline;
  294. property CodePoint : UInt24 read GetCodePoint;
  295. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  296. procedure GetWeightArray(ADest : PUCA_PropWeights);
  297. function GetSelfOnlySize() : Cardinal;inline;
  298. procedure SetContextual(AValue : Boolean);inline;
  299. function GetContextual() : Boolean;inline;
  300. property Contextual : Boolean read GetContextual write setContextual;
  301. function GetContext() : PUCA_PropItemContextTreeRec;
  302. procedure SetDeleted(AValue : Boolean);inline;
  303. function IsDeleted() : Boolean;inline;
  304. function IsValid() : Boolean;inline;
  305. function IsWeightCompress_1() : Boolean;inline;
  306. function IsWeightCompress_2() : Boolean;inline;
  307. end;
  308. PUCA_PropItemRec = ^TUCA_PropItemRec;
  309. TUCA_PropIndexItem = packed record
  310. CodePoint : Cardinal;
  311. Position : Integer;
  312. end;
  313. PUCA_PropIndexItem = ^TUCA_PropIndexItem;
  314. TUCA_PropBook = X_PACKED record
  315. ItemSize : Integer;
  316. Index : array of TUCA_PropIndexItem;
  317. Items : PUCA_PropItemRec; //Native Endian
  318. ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
  319. VariableLowLimit : Word;
  320. VariableHighLimit : Word;
  321. end;
  322. PUCA_PropBook = ^TUCA_PropBook;
  323. TBmpFirstTable = array[0..255] of Byte;
  324. TBmpSecondTableItem = array[0..255] of Word;
  325. TBmpSecondTable = array of TBmpSecondTableItem;
  326. T3lvlBmp1Table = array[0..255] of Byte;
  327. T3lvlBmp2TableItem = array[0..15] of Word;
  328. T3lvlBmp2Table = array of T3lvlBmp2TableItem;
  329. T3lvlBmp3TableItem = array[0..15] of Word;
  330. T3lvlBmp3Table = array of T3lvlBmp3TableItem;
  331. TucaBmpFirstTable = array[0..255] of Byte;
  332. TucaBmpSecondTableItem = array[0..255] of Cardinal;
  333. TucaBmpSecondTable = array of TucaBmpSecondTableItem;
  334. PucaBmpFirstTable = ^TucaBmpFirstTable;
  335. PucaBmpSecondTable = ^TucaBmpSecondTable;
  336. const
  337. LOW_SURROGATE_BEGIN = Word($DC00);
  338. LOW_SURROGATE_END = Word($DFFF);
  339. LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
  340. HIGH_SURROGATE_BEGIN = Word($D800);
  341. HIGH_SURROGATE_END = Word($DBFF);
  342. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  343. type
  344. TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
  345. TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
  346. TOBmpSecondTable = array of TOBmpSecondTableItem;
  347. T3lvlOBmp1Table = array[0..1023] of Byte;
  348. T3lvlOBmp2TableItem = array[0..31] of Word;
  349. T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
  350. T3lvlOBmp3TableItem = array[0..31] of Word;
  351. T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
  352. TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
  353. TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
  354. TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
  355. PucaOBmpFirstTable = ^TucaOBmpFirstTable;
  356. PucaOBmpSecondTable = ^TucaOBmpSecondTable;
  357. type
  358. TEndianKind = (ekLittle, ekBig);
  359. const
  360. ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
  361. {$IFDEF ENDIAN_LITTLE}
  362. ENDIAN_NATIVE = ekLittle;
  363. ENDIAN_NON_NATIVE = ekBig;
  364. {$ENDIF ENDIAN_LITTLE}
  365. {$IFDEF ENDIAN_BIG}
  366. ENDIAN_NATIVE = ekBig;
  367. ENDIAN_NON_NATIVE = ekLittle;
  368. {$ENDIF ENDIAN_BIG}
  369. procedure GenerateLicenceText(ADest : TStream);
  370. function BoolToByte(AValue : Boolean): Byte;inline;
  371. function IsHangulSyllable(
  372. const ACodePoint : TUnicodeCodePoint;
  373. const AHangulList : TCodePointRecArray
  374. ) : Boolean;
  375. procedure ParseHangulSyllableTypes(
  376. ADataAStream : TMemoryStream;
  377. var ACodePointList : TCodePointRecArray
  378. );
  379. procedure ParseProps(
  380. ADataAStream : TMemoryStream;
  381. var APropList : TPropListLineRecArray
  382. );
  383. function FindCodePointsByProperty(
  384. const APropName : string;
  385. const APropList : TPropListLineRecArray
  386. ) : TCodePointRecArray;
  387. procedure ParseBlokcs(
  388. ADataAStream : TMemoryStream;
  389. var ABlocks : TBlocks
  390. );
  391. procedure ParseUCAFile(
  392. ADataAStream : TMemoryStream;
  393. var ABook : TUCA_DataBook
  394. );
  395. procedure MakeUCA_Props(
  396. ABook : PUCA_DataBook;
  397. out AProps : PUCA_PropBook
  398. );
  399. procedure FreeUcaBook(var ABook : PUCA_PropBook);
  400. procedure MakeUCA_BmpTables(
  401. var AFirstTable : TucaBmpFirstTable;
  402. var ASecondTable : TucaBmpSecondTable;
  403. const APropBook : PUCA_PropBook
  404. );
  405. procedure MakeUCA_OBmpTables(
  406. var AFirstTable : TucaOBmpFirstTable;
  407. var ASecondTable : TucaOBmpSecondTable;
  408. const APropBook : PUCA_PropBook
  409. );
  410. function GetPropPosition(
  411. const AHighS,
  412. ALowS : Word;
  413. const AFirstTable : PucaOBmpFirstTable;
  414. const ASecondTable : PucaOBmpSecondTable
  415. ): Integer;inline;overload;
  416. procedure GenerateUCA_Head(
  417. ADest : TStream;
  418. ABook : PUCA_DataBook;
  419. AProps : PUCA_PropBook
  420. );
  421. procedure GenerateUCA_BmpTables(
  422. AStream,
  423. ANativeEndianStream,
  424. ANonNativeEndianStream : TStream;
  425. var AFirstTable : TucaBmpFirstTable;
  426. var ASecondTable : TucaBmpSecondTable
  427. );
  428. procedure GenerateBinaryUCA_BmpTables(
  429. ANativeEndianStream,
  430. ANonNativeEndianStream : TStream;
  431. var AFirstTable : TucaBmpFirstTable;
  432. var ASecondTable : TucaBmpSecondTable
  433. );
  434. procedure GenerateUCA_PropTable(
  435. ADest : TStream;
  436. const APropBook : PUCA_PropBook;
  437. const AEndian : TEndianKind
  438. );
  439. procedure GenerateBinaryUCA_PropTable(
  440. // WARNING : files must be generated for each endianess (Little / Big)
  441. ANativeEndianStream,
  442. ANonNativeEndianStream : TStream;
  443. const APropBook : PUCA_PropBook
  444. );
  445. procedure GenerateUCA_OBmpTables(
  446. AStream,
  447. ANativeEndianStream,
  448. ANonNativeEndianStream : TStream;
  449. var AFirstTable : TucaOBmpFirstTable;
  450. var ASecondTable : TucaOBmpSecondTable
  451. );
  452. procedure GenerateBinaryUCA_OBmpTables(
  453. ANativeEndianStream,
  454. ANonNativeEndianStream : TStream;
  455. var AFirstTable : TucaOBmpFirstTable;
  456. var ASecondTable : TucaOBmpSecondTable
  457. );
  458. procedure Parse_UnicodeData(
  459. ADataAStream : TMemoryStream;
  460. var APropList : TPropRecArray;
  461. var ANumericTable : TNumericValueArray;
  462. var ADataLineList : TDataLineRecArray;
  463. var ADecomposition : TDecompositionArray;
  464. const AHangulList : TCodePointRecArray;
  465. const AWhiteSpaces : TCodePointRecArray
  466. );
  467. procedure MakeDecomposition(
  468. const ARawData : TDecompositionArray;
  469. var ABook : TDecompositionBook
  470. );
  471. procedure MakeBmpTables(
  472. var AFirstTable : TBmpFirstTable;
  473. var ASecondTable : TBmpSecondTable;
  474. const ADataLineList : TDataLineRecArray
  475. );
  476. procedure MakeBmpTables3Levels(
  477. var AFirstTable : T3lvlBmp1Table;
  478. var ASecondTable : T3lvlBmp2Table;
  479. var AThirdTable : T3lvlBmp3Table;
  480. const ADataLineList : TDataLineRecArray
  481. );
  482. procedure GenerateBmpTables(
  483. ADest : TStream;
  484. var AFirstTable : TBmpFirstTable;
  485. var ASecondTable : TBmpSecondTable
  486. );
  487. procedure Generate3lvlBmpTables(
  488. ADest : TStream;
  489. var AFirstTable : T3lvlBmp1Table;
  490. var ASecondTable : T3lvlBmp2Table;
  491. var AThirdTable : T3lvlBmp3Table
  492. );
  493. procedure GeneratePropTable(
  494. ADest : TStream;
  495. const APropList : TPropRecArray;
  496. const AEndian : TEndianKind
  497. );
  498. procedure GenerateNumericTable(
  499. ADest : TStream;
  500. const ANumList : TNumericValueArray;
  501. const ACompleteUnit : Boolean
  502. );
  503. procedure GenerateDecompositionBookTable(
  504. ADest : TStream;
  505. const ABook : TDecompositionBook;
  506. const AEndian : TEndianKind
  507. );
  508. procedure GenerateOutBmpTable(
  509. ADest : TStream;
  510. const AList : TDataLineRecArray
  511. );
  512. function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
  513. function EvaluateFloat(const AStr : string) : Double;
  514. function StrToCategory(const AStr : string) : TUnicodeCategory;
  515. function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
  516. function IsWhiteSpace(
  517. const ACodePoint : TUnicodeCodePoint;
  518. const AWhiteSpaces : TCodePointRecArray
  519. ) : Boolean;
  520. function GetPropID(
  521. ACodePoint : TUnicodeCodePoint;
  522. const ADataLineList : TDataLineRecArray
  523. ) : Cardinal;
  524. //--------------------
  525. procedure MakeOBmpTables(
  526. var AFirstTable : TOBmpFirstTable;
  527. var ASecondTable : TOBmpSecondTable;
  528. const ADataLineList : TDataLineRecArray
  529. );
  530. procedure MakeOBmpTables3Levels(
  531. var AFirstTable : T3lvlOBmp1Table;
  532. var ASecondTable : T3lvlOBmp2Table;
  533. var AThirdTable : T3lvlOBmp3Table;
  534. const ADataLineList : TDataLineRecArray
  535. );
  536. procedure GenerateOBmpTables(
  537. ADest : TStream;
  538. var AFirstTable : TOBmpFirstTable;
  539. var ASecondTable : TOBmpSecondTable
  540. );
  541. procedure Generate3lvlOBmpTables(
  542. ADest : TStream;
  543. var AFirstTable : T3lvlOBmp1Table;
  544. var ASecondTable : T3lvlOBmp2Table;
  545. var AThirdTable : T3lvlOBmp3Table
  546. );
  547. function GetProp(
  548. const AHighS,
  549. ALowS : Word;
  550. const AProps : TPropRecArray;
  551. var AFirstTable : TOBmpFirstTable;
  552. var ASecondTable : TOBmpSecondTable
  553. ): PPropRec; inline;overload;
  554. function GetProp(
  555. const AHighS,
  556. ALowS : Word;
  557. const AProps : TPropRecArray;
  558. var AFirstTable : T3lvlOBmp1Table;
  559. var ASecondTable : T3lvlOBmp2Table;
  560. var AThirdTable : T3lvlOBmp3Table
  561. ): PPropRec; inline;overload;
  562. procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
  563. function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
  564. type
  565. TBitOrder = 0..7;
  566. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
  567. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
  568. function GenerateEndianIncludeFileName(
  569. const AStoreName : string;
  570. const AEndian : TEndianKind
  571. ): string;inline;
  572. procedure ReverseFromNativeEndian(
  573. const AData : PUCA_PropItemRec;
  574. const ADataLen : Cardinal;
  575. const ADest : PUCA_PropItemRec
  576. );
  577. procedure ReverseToNativeEndian(
  578. const AData : PUCA_PropItemRec;
  579. const ADataLen : Cardinal;
  580. const ADest : PUCA_PropItemRec
  581. );
  582. procedure CompareProps(
  583. const AProp1,
  584. AProp2 : PUCA_PropItemRec;
  585. const ADataLen : Integer
  586. );
  587. type
  588. TCollationName = string[128];
  589. TSerializedCollationHeader = packed record
  590. Base : TCollationName;
  591. Version : TCollationName;
  592. CollationName : TCollationName;
  593. VariableWeight : Byte;
  594. Backwards : Byte;
  595. BMP_Table1Length : DWord;
  596. BMP_Table2Length : DWord;
  597. OBMP_Table1Length : DWord;
  598. OBMP_Table2Length : DWord;
  599. PropCount : DWord;
  600. VariableLowLimit : Word;
  601. VariableHighLimit : Word;
  602. ChangedFields : Byte;
  603. end;
  604. PSerializedCollationHeader = ^TSerializedCollationHeader;
  605. procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
  606. procedure ReverseBytes(var AData; const ALength : Integer);
  607. procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
  608. resourcestring
  609. SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
  610. implementation
  611. uses
  612. typinfo, Math, AVL_Tree,
  613. trie;
  614. type
  615. TCardinalRec = packed record
  616. {$ifdef FPC_LITTLE_ENDIAN}
  617. byte0, byte1, byte2, byte3 : Byte;
  618. {$else FPC_LITTLE_ENDIAN}
  619. byte3, byte2, byte1, byte0 : Byte;
  620. {$endif FPC_LITTLE_ENDIAN}
  621. end;
  622. TWordRec = packed record
  623. {$ifdef FPC_LITTLE_ENDIAN}
  624. byte0, byte1 : Byte;
  625. {$else FPC_LITTLE_ENDIAN}
  626. byte1, byte0 : Byte;
  627. {$endif FPC_LITTLE_ENDIAN}
  628. end;
  629. { TUInt24Rec }
  630. class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
  631. begin
  632. TCardinalRec(Result).byte0 := a.byte0;
  633. TCardinalRec(Result).byte1 := a.byte1;
  634. TCardinalRec(Result).byte2 := a.byte2;
  635. TCardinalRec(Result).byte3 := 0;
  636. end;
  637. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  638. begin
  639. TCardinalRec(Result).byte0 := a.byte0;
  640. TCardinalRec(Result).byte1 := a.byte1;
  641. TCardinalRec(Result).byte2 := a.byte2;
  642. TCardinalRec(Result).byte3 := 0;
  643. end;
  644. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  645. begin
  646. Result := Cardinal(a);
  647. end;
  648. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  649. begin
  650. {$IFOPT R+}
  651. if (a.byte2 > 0) then
  652. Error(reIntOverflow);
  653. {$ENDIF R+}
  654. TWordRec(Result).byte0 := a.byte0;
  655. TWordRec(Result).byte1 := a.byte1;
  656. end;
  657. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  658. begin
  659. {$IFOPT R+}
  660. if (a.byte1 > 0) or (a.byte2 > 0) then
  661. Error(reIntOverflow);
  662. {$ENDIF R+}
  663. Result := a.byte0;
  664. end;
  665. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  666. begin
  667. {$IFOPT R+}
  668. if (a > $FFFFFF) then
  669. Error(reIntOverflow);
  670. {$ENDIF R+}
  671. Result.byte0 := TCardinalRec(a).byte0;
  672. Result.byte1 := TCardinalRec(a).byte1;
  673. Result.byte2 := TCardinalRec(a).byte2;
  674. end;
  675. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  676. begin
  677. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  678. end;
  679. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  680. begin
  681. Result := (TCardinalRec(b).byte3 = 0) and
  682. (a.byte0 = TCardinalRec(b).byte0) and
  683. (a.byte1 = TCardinalRec(b).byte1) and
  684. (a.byte2 = TCardinalRec(b).byte2);
  685. end;
  686. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  687. begin
  688. Result := (b = a);
  689. end;
  690. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  691. begin
  692. Result := (LongInt(a) = b);
  693. end;
  694. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  695. begin
  696. Result := (b = a);
  697. end;
  698. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  699. begin
  700. Result := (a.byte2 = 0) and
  701. (a.byte0 = TWordRec(b).byte0) and
  702. (a.byte1 = TWordRec(b).byte1);
  703. end;
  704. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  705. begin
  706. Result := (b = a);
  707. end;
  708. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  709. begin
  710. Result := (a.byte2 = 0) and
  711. (a.byte1 = 0) and
  712. (a.byte0 = b);
  713. end;
  714. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  715. begin
  716. Result := (b = a);
  717. end;
  718. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  719. begin
  720. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  721. end;
  722. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  723. begin
  724. Result := (TCardinalRec(b).byte3 <> 0) or
  725. (a.byte0 <> TCardinalRec(b).byte0) or
  726. (a.byte1 <> TCardinalRec(b).byte1) or
  727. (a.byte2 <> TCardinalRec(b).byte2);
  728. end;
  729. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  730. begin
  731. Result := (b <> a);
  732. end;
  733. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  734. begin
  735. Result := (a.byte2 > b.byte2) or
  736. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  737. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  738. end;
  739. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  740. begin
  741. Result := Cardinal(a) > b;
  742. end;
  743. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  744. begin
  745. Result := a > Cardinal(b);
  746. end;
  747. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  748. begin
  749. Result := (a.byte2 > b.byte2) or
  750. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  751. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  752. end;
  753. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  754. begin
  755. Result := Cardinal(a) >= b;
  756. end;
  757. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  758. begin
  759. Result := a >= Cardinal(b);
  760. end;
  761. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  762. begin
  763. Result := (b > a);
  764. end;
  765. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  766. begin
  767. Result := Cardinal(a) < b;
  768. end;
  769. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  770. begin
  771. Result := a < Cardinal(b);
  772. end;
  773. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  774. begin
  775. Result := (b >= a);
  776. end;
  777. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  778. begin
  779. Result := Cardinal(a) <= b;
  780. end;
  781. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  782. begin
  783. Result := a <= Cardinal(b);
  784. end;
  785. function GenerateEndianIncludeFileName(
  786. const AStoreName : string;
  787. const AEndian : TEndianKind
  788. ): string;inline;
  789. begin
  790. Result := ExtractFilePath(AStoreName) +
  791. ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
  792. end;
  793. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
  794. begin
  795. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  796. end;
  797. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
  798. begin
  799. if AValue then
  800. AData := AData or (1 shl (ABit mod 8))
  801. else
  802. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  803. end;
  804. var
  805. FS : TFormatSettings;
  806. function EvaluateFloat(const AStr : string) : Double;
  807. var
  808. s, n, d : string;
  809. i : Integer;
  810. begin
  811. Result := 0;
  812. s := Trim(AStr);
  813. if (Length(s) > 0) then begin
  814. i := Pos('/',s);
  815. if (i < 1) then
  816. Result := StrToFloat(s,FS)
  817. else begin
  818. n := Copy(s,1,i-1);
  819. d := Copy(s,i+1,MaxInt);
  820. Result := StrToInt(n) / StrToInt(d);
  821. end;
  822. end;
  823. end;
  824. function StrToCategory(const AStr : string) : TUnicodeCategory;
  825. var
  826. s : string;
  827. begin
  828. s := UpperCase(Trim(AStr));
  829. if (s = 'LU') then
  830. Result := ucUppercaseLetter
  831. else if (s = 'LL') then
  832. Result := ucLowercaseLetter
  833. else if (s = 'LT') then
  834. Result := ucTitlecaseLetter
  835. else if (s = 'LM') then
  836. Result := ucModifierLetter
  837. else if (s = 'LO') then
  838. Result := ucOtherLetter
  839. else
  840. if (s = 'MN') then
  841. Result := ucNonSpacingMark
  842. else if (s = 'MC') then
  843. Result := ucCombiningMark
  844. else if (s = 'ME') then
  845. Result := ucEnclosingMark
  846. else
  847. if (s = 'ND') then
  848. Result := ucDecimalNumber
  849. else if (s = 'NL') then
  850. Result := ucLetterNumber
  851. else if (s = 'NO') then
  852. Result := ucOtherNumber
  853. else
  854. if (s = 'PC') then
  855. Result := ucConnectPunctuation
  856. else if (s = 'PD') then
  857. Result := ucDashPunctuation
  858. else if (s = 'PS') then
  859. Result := ucOpenPunctuation
  860. else if (s = 'PE') then
  861. Result := ucClosePunctuation
  862. else if (s = 'PI') then
  863. Result := ucInitialPunctuation
  864. else if (s = 'PF') then
  865. Result := ucFinalPunctuation
  866. else if (s = 'PO') then
  867. Result := ucOtherPunctuation
  868. else
  869. if (s = 'SM') then
  870. Result := ucMathSymbol
  871. else if (s = 'SC') then
  872. Result := ucCurrencySymbol
  873. else if (s = 'SK') then
  874. Result := ucModifierSymbol
  875. else if (s = 'SO') then
  876. Result := ucOtherSymbol
  877. else
  878. if (s = 'ZS') then
  879. Result := ucSpaceSeparator
  880. else if (s = 'ZL') then
  881. Result := ucLineSeparator
  882. else if (s = 'ZP') then
  883. Result := ucParagraphSeparator
  884. else
  885. if (s = 'CC') then
  886. Result := ucControl
  887. else if (s = 'CF') then
  888. Result := ucFormat
  889. else if (s = 'CS') then
  890. Result := ucSurrogate
  891. else if (s = 'CO') then
  892. Result := ucPrivateUse
  893. else
  894. Result := ucUnassigned;
  895. end;
  896. function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
  897. var
  898. s : string;
  899. begin
  900. s := Trim(ACP);
  901. Result := 0;
  902. if (Length(s) > 0) and (s <> '#') then
  903. Result := StrToInt('$' + s);
  904. end;
  905. {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
  906. begin
  907. case ACodePoint of
  908. $0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
  909. $0020 : Result := True;// White_Space # Zs SPACE
  910. $0085 : Result := True;// White_Space # Cc <control-0085>
  911. $00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
  912. $1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
  913. $180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
  914. $2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
  915. $2028 : Result := True;// White_Space # Zl LINE SEPARATOR
  916. $2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
  917. $202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
  918. $205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
  919. $3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
  920. else
  921. Result := False;
  922. end;
  923. end;}
  924. function IsWhiteSpace(
  925. const ACodePoint : TUnicodeCodePoint;
  926. const AWhiteSpaces : TCodePointRecArray
  927. ) : Boolean;
  928. var
  929. i : Integer;
  930. p : ^TCodePointRec;
  931. begin
  932. p := @AWhiteSpaces[Low(AWhiteSpaces)];
  933. for i := Low(AWhiteSpaces) to High(AWhiteSpaces) do begin
  934. if (p^.LineType = 0) then begin
  935. if (p^.CodePoint = ACodePoint) then
  936. exit(True);
  937. end else begin
  938. if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then
  939. exit(True);
  940. end;
  941. Inc(p);
  942. end;
  943. Result := False;
  944. end;
  945. function NormalizeBlockName(const AName : string) : string;
  946. var
  947. i, c, k : Integer;
  948. s : string;
  949. begin
  950. c := Length(AName);
  951. SetLength(Result,c);
  952. s := LowerCase(AName);
  953. k := 0;
  954. for i := 1 to c do begin
  955. if (s[1] in ['a'..'z','0'..'9','-']) then begin
  956. k := k + 1;
  957. Result[k] := s[i];
  958. end;
  959. end;
  960. SetLength(Result,k);
  961. end;
  962. procedure ParseBlokcs(
  963. ADataAStream : TMemoryStream;
  964. var ABlocks : TBlocks
  965. );
  966. const
  967. LINE_LENGTH = 1024;
  968. DATA_LENGTH = 25000;
  969. var
  970. p : PAnsiChar;
  971. actualDataLen : Integer;
  972. bufferLength, bufferPos, lineLength, linePos : Integer;
  973. line : ansistring;
  974. function NextLine() : Boolean;
  975. var
  976. locOldPos : Integer;
  977. locOldPointer : PAnsiChar;
  978. begin
  979. Result := False;
  980. locOldPointer := p;
  981. locOldPos := bufferPos;
  982. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  983. Inc(p);
  984. Inc(bufferPos);
  985. end;
  986. if (locOldPos = bufferPos) and (p^ = #10) then begin
  987. lineLength := 0;
  988. Inc(p);
  989. Inc(bufferPos);
  990. linePos := 1;
  991. Result := True;
  992. end else if (locOldPos < bufferPos) then begin
  993. lineLength := (bufferPos - locOldPos);
  994. Move(locOldPointer^,line[1],lineLength);
  995. if (p^ = #10) then begin
  996. Dec(lineLength);
  997. Inc(p);
  998. Inc(bufferPos);
  999. end;
  1000. linePos := 1;
  1001. Result := True;
  1002. end;
  1003. end;
  1004. function NextToken() : ansistring;
  1005. var
  1006. k : Integer;
  1007. begin
  1008. k := linePos;
  1009. if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
  1010. Inc(linePos);
  1011. Result := Copy(line,k,(linePos-k));
  1012. exit;
  1013. end;
  1014. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  1015. Inc(linePos);
  1016. if (linePos > k) then begin
  1017. if (line[linePos] in [';','#','.']) then
  1018. Result := Copy(line,k,(linePos-k))
  1019. else
  1020. Result := Copy(line,k,(linePos-k+1));
  1021. Result := Trim(Result);
  1022. end else begin
  1023. Result := '';
  1024. end;
  1025. end;
  1026. procedure ParseLine();
  1027. var
  1028. locData : TBlockItemRec;
  1029. s : ansistring;
  1030. begin
  1031. s := NextToken();
  1032. if (s = '') or (s[1] = '#') then
  1033. exit;
  1034. locData.RangeStart := StrToInt('$'+s);
  1035. s := NextToken();
  1036. if (s <> '.') then
  1037. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  1038. s := NextToken();
  1039. if (s <> '.') then
  1040. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  1041. s := NextToken();
  1042. locData.RangeEnd := StrToInt('$'+s);
  1043. s := NextToken();
  1044. if (s <> ';') then
  1045. raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
  1046. locData.Name := Trim(NextToken());
  1047. locData.CanonicalName := NormalizeBlockName(locData.Name);
  1048. if (Length(ABlocks) <= actualDataLen) then
  1049. SetLength(ABlocks,Length(ABlocks)*2);
  1050. ABlocks[actualDataLen] := locData;
  1051. Inc(actualDataLen);
  1052. end;
  1053. procedure Prepare();
  1054. begin
  1055. SetLength(ABlocks,DATA_LENGTH);
  1056. actualDataLen := 0;
  1057. bufferLength := ADataAStream.Size;
  1058. bufferPos := 0;
  1059. p := ADataAStream.Memory;
  1060. lineLength := 0;
  1061. SetLength(line,LINE_LENGTH);
  1062. end;
  1063. begin
  1064. Prepare();
  1065. while NextLine() do
  1066. ParseLine();
  1067. SetLength(ABlocks,actualDataLen);
  1068. end;
  1069. procedure ParseProps(
  1070. ADataAStream : TMemoryStream;
  1071. var APropList : TPropListLineRecArray
  1072. );
  1073. const
  1074. LINE_LENGTH = 1024;
  1075. DATA_LENGTH = 25000;
  1076. var
  1077. p : PAnsiChar;
  1078. actualDataLen : Integer;
  1079. bufferLength, bufferPos, lineLength, linePos : Integer;
  1080. line : ansistring;
  1081. function NextLine() : Boolean;
  1082. var
  1083. locOldPos : Integer;
  1084. locOldPointer : PAnsiChar;
  1085. begin
  1086. Result := False;
  1087. locOldPointer := p;
  1088. locOldPos := bufferPos;
  1089. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1090. Inc(p);
  1091. Inc(bufferPos);
  1092. end;
  1093. if (locOldPos = bufferPos) and (p^ = #10) then begin
  1094. lineLength := 0;
  1095. Inc(p);
  1096. Inc(bufferPos);
  1097. linePos := 1;
  1098. Result := True;
  1099. end else if (locOldPos < bufferPos) then begin
  1100. lineLength := (bufferPos - locOldPos);
  1101. Move(locOldPointer^,line[1],lineLength);
  1102. if (p^ = #10) then begin
  1103. Dec(lineLength);
  1104. Inc(p);
  1105. Inc(bufferPos);
  1106. end;
  1107. linePos := 1;
  1108. Result := True;
  1109. end;
  1110. end;
  1111. function NextToken() : ansistring;
  1112. var
  1113. k : Integer;
  1114. begin
  1115. k := linePos;
  1116. if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
  1117. Inc(linePos);
  1118. Result := Copy(line,k,(linePos-k));
  1119. exit;
  1120. end;
  1121. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  1122. Inc(linePos);
  1123. if (linePos > k) then begin
  1124. if (line[linePos] in [';','#','.']) then
  1125. Result := Copy(line,k,(linePos-k))
  1126. else
  1127. Result := Copy(line,k,(linePos-k+1));
  1128. Result := Trim(Result);
  1129. end else begin
  1130. Result := '';
  1131. end;
  1132. end;
  1133. procedure ParseLine();
  1134. var
  1135. locCP : Cardinal;
  1136. locData : TPropListLineRec;
  1137. s : ansistring;
  1138. begin
  1139. s := NextToken();
  1140. if (s = '') or (s[1] = '#') then
  1141. exit;
  1142. locCP := StrToInt('$'+s);
  1143. s := NextToken();
  1144. if (s = ';') then begin
  1145. locData.CodePoint.LineType := 0;
  1146. locData.CodePoint.CodePoint := locCP;
  1147. end else begin
  1148. if (s = '') or (s <> '.') or (NextToken() <> '.') then
  1149. raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
  1150. locData.CodePoint.LineType := 1;
  1151. locData.CodePoint.StartCodePoint := locCP;
  1152. locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
  1153. s := NextToken();
  1154. if (s <> ';') then
  1155. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  1156. end;
  1157. locData.PropName := Trim(NextToken());
  1158. if (Length(APropList) <= actualDataLen) then
  1159. SetLength(APropList,Length(APropList)*2);
  1160. APropList[actualDataLen] := locData;
  1161. Inc(actualDataLen);
  1162. end;
  1163. procedure Prepare();
  1164. begin
  1165. SetLength(APropList,DATA_LENGTH);
  1166. actualDataLen := 0;
  1167. bufferLength := ADataAStream.Size;
  1168. bufferPos := 0;
  1169. p := ADataAStream.Memory;
  1170. lineLength := 0;
  1171. SetLength(line,LINE_LENGTH);
  1172. end;
  1173. begin
  1174. Prepare();
  1175. while NextLine() do
  1176. ParseLine();
  1177. SetLength(APropList,actualDataLen);
  1178. end;
  1179. function FindCodePointsByProperty(
  1180. const APropName : string;
  1181. const APropList : TPropListLineRecArray
  1182. ) : TCodePointRecArray;
  1183. var
  1184. r : TCodePointRecArray;
  1185. i, k : Integer;
  1186. s : string;
  1187. begin
  1188. k := 0;
  1189. r := nil;
  1190. s := LowerCase(Trim(APropName));
  1191. for i := Low(APropList) to High(APropList) do begin
  1192. if (LowerCase(APropList[i].PropName) = s) then begin
  1193. if (k >= Length(r)) then begin
  1194. if (k = 0) then
  1195. SetLength(r,24)
  1196. else
  1197. SetLength(r,(2*Length(r)));
  1198. end;
  1199. r[k] := APropList[i].CodePoint;
  1200. Inc(k);
  1201. end;
  1202. end;
  1203. SetLength(r,k);
  1204. Result := r;
  1205. end;
  1206. procedure ParseHangulSyllableTypes(
  1207. ADataAStream : TMemoryStream;
  1208. var ACodePointList : TCodePointRecArray
  1209. );
  1210. const
  1211. LINE_LENGTH = 1024;
  1212. DATA_LENGTH = 25000;
  1213. var
  1214. p : PAnsiChar;
  1215. actualDataLen : Integer;
  1216. bufferLength, bufferPos, lineLength, linePos : Integer;
  1217. line : ansistring;
  1218. function NextLine() : Boolean;
  1219. var
  1220. locOldPos : Integer;
  1221. locOldPointer : PAnsiChar;
  1222. begin
  1223. Result := False;
  1224. locOldPointer := p;
  1225. locOldPos := bufferPos;
  1226. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1227. Inc(p);
  1228. Inc(bufferPos);
  1229. end;
  1230. if (locOldPos = bufferPos) and (p^ = #10) then begin
  1231. lineLength := 0;
  1232. Inc(p);
  1233. Inc(bufferPos);
  1234. linePos := 1;
  1235. Result := True;
  1236. end else if (locOldPos < bufferPos) then begin
  1237. lineLength := (bufferPos - locOldPos);
  1238. Move(locOldPointer^,line[1],lineLength);
  1239. if (p^ = #10) then begin
  1240. Dec(lineLength);
  1241. Inc(p);
  1242. Inc(bufferPos);
  1243. end;
  1244. linePos := 1;
  1245. Result := True;
  1246. end;
  1247. end;
  1248. function NextToken() : ansistring;
  1249. var
  1250. k : Integer;
  1251. begin
  1252. k := linePos;
  1253. if (linePos < lineLength) and (line[linePos] = '.') then begin
  1254. Inc(linePos);
  1255. while (linePos < lineLength) and (line[linePos] = '.') do begin
  1256. Inc(linePos);
  1257. end;
  1258. Result := Copy(line,k,(linePos-k));
  1259. exit;
  1260. end;
  1261. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  1262. Inc(linePos);
  1263. if (linePos > k) then begin
  1264. if (line[linePos] in [';','#','.']) then
  1265. Result := Copy(line,k,(linePos-k))
  1266. else
  1267. Result := Copy(line,k,(linePos-k+1));
  1268. Result := Trim(Result);
  1269. end else begin
  1270. Result := '';
  1271. end;
  1272. //Inc(linePos);
  1273. end;
  1274. procedure ParseLine();
  1275. var
  1276. locData : TCodePointRec;
  1277. s : ansistring;
  1278. begin
  1279. s := NextToken();
  1280. if (s = '') or (s[1] = '#') then
  1281. exit;
  1282. locData.CodePoint := StrToInt('$'+s);
  1283. s := NextToken();
  1284. if (s = '') or (s[1] in [';','#']) then begin
  1285. locData.LineType := 0;
  1286. end else begin
  1287. if (s <> '..') then
  1288. raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
  1289. locData.StartCodePoint := locData.CodePoint;
  1290. locData.EndCodePoint := StrToInt('$'+NextToken());
  1291. locData.LineType := 1;
  1292. end;
  1293. if (Length(ACodePointList) <= actualDataLen) then
  1294. SetLength(ACodePointList,Length(ACodePointList)*2);
  1295. ACodePointList[actualDataLen] := locData;
  1296. Inc(actualDataLen);
  1297. end;
  1298. procedure Prepare();
  1299. begin
  1300. SetLength(ACodePointList,DATA_LENGTH);
  1301. actualDataLen := 0;
  1302. bufferLength := ADataAStream.Size;
  1303. bufferPos := 0;
  1304. p := ADataAStream.Memory;
  1305. lineLength := 0;
  1306. SetLength(line,LINE_LENGTH);
  1307. end;
  1308. begin
  1309. Prepare();
  1310. while NextLine() do
  1311. ParseLine();
  1312. SetLength(ACodePointList,actualDataLen);
  1313. end;
  1314. function IsHangulSyllable(
  1315. const ACodePoint : TUnicodeCodePoint;
  1316. const AHangulList : TCodePointRecArray
  1317. ) : Boolean;
  1318. var
  1319. i : Integer;
  1320. p : ^TCodePointRec;
  1321. begin
  1322. Result := False;
  1323. p := @AHangulList[Low(AHangulList)];
  1324. for i := Low(AHangulList) to High(AHangulList) do begin
  1325. if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
  1326. ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
  1327. then begin
  1328. Result := True;
  1329. Break;
  1330. end;
  1331. Inc(p);
  1332. end;
  1333. end;
  1334. function IndexOf(
  1335. const AProp : TPropRec;
  1336. const APropList : TPropRecArray;
  1337. const AActualLen : Integer
  1338. ) : Integer;overload;
  1339. var
  1340. i : Integer;
  1341. p : PPropRec;
  1342. begin
  1343. Result := -1;
  1344. if (AActualLen > 0) then begin
  1345. p := @APropList[0];
  1346. for i := 0 to AActualLen - 1 do begin
  1347. if (AProp.Category = p^.Category) and
  1348. (AProp.CCC = p^.CCC) and
  1349. (AProp.NumericIndex = p^.NumericIndex) and
  1350. (AProp.SimpleUpperCase = p^.SimpleUpperCase) and
  1351. (AProp.SimpleLowerCase = p^.SimpleLowerCase) and
  1352. (AProp.WhiteSpace = p^.WhiteSpace) and
  1353. //
  1354. (AProp.DecompositionID = p^.DecompositionID) and
  1355. (* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
  1356. ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
  1357. *)
  1358. (AProp.HangulSyllable = p^.HangulSyllable)
  1359. then begin
  1360. Result := i;
  1361. Break;
  1362. end;
  1363. Inc(p);
  1364. end;
  1365. end;
  1366. end;
  1367. function IndexOf(
  1368. const AItem : TUnicodeCodePointArray;
  1369. const AList : TDecompositionArray
  1370. ) : Integer;overload;
  1371. var
  1372. p : TUnicodeCodePointArray;
  1373. i : Integer;
  1374. begin
  1375. Result := -1;
  1376. if (Length(AList) = 0) then
  1377. exit;
  1378. for i := Low(AList) to High(AList) do begin
  1379. p := AList[i];
  1380. if (Length(p) = Length(AItem)) then begin
  1381. if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
  1382. exit(i);
  1383. end;
  1384. end;
  1385. Result := -1;
  1386. end;
  1387. function IndexOf(
  1388. const AItem : TNumericValue;
  1389. const AList : TNumericValueArray;
  1390. const AActualLen : Integer
  1391. ) : Integer;overload;
  1392. var
  1393. p : ^TNumericValue;
  1394. i : Integer;
  1395. begin
  1396. Result := -1;
  1397. if (AActualLen = 0) then
  1398. exit;
  1399. p := @AList[Low(AList)];
  1400. for i := Low(AList) to AActualLen - 1 do begin
  1401. if (AItem = p^) then
  1402. exit(i);
  1403. Inc(p);
  1404. end;
  1405. Result := -1;
  1406. end;
  1407. procedure Parse_UnicodeData(
  1408. ADataAStream : TMemoryStream;
  1409. var APropList : TPropRecArray;
  1410. var ANumericTable : TNumericValueArray;
  1411. var ADataLineList : TDataLineRecArray;
  1412. var ADecomposition : TDecompositionArray;
  1413. const AHangulList : TCodePointRecArray;
  1414. const AWhiteSpaces : TCodePointRecArray
  1415. );
  1416. const
  1417. LINE_LENGTH = 1024;
  1418. PROP_LENGTH = 5000;
  1419. DATA_LENGTH = 25000;
  1420. var
  1421. p : PAnsiChar;
  1422. bufferLength, bufferPos : Integer;
  1423. actualPropLen, actualDataLen, actualNumLen : Integer;
  1424. line : ansistring;
  1425. lineLength, linePos : Integer;
  1426. function NextLine() : Boolean;
  1427. var
  1428. locOldPos : Integer;
  1429. locOldPointer : PAnsiChar;
  1430. begin
  1431. Result := False;
  1432. locOldPointer := p;
  1433. locOldPos := bufferPos;
  1434. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1435. Inc(p);
  1436. Inc(bufferPos);
  1437. end;
  1438. if (locOldPos < bufferPos) then begin
  1439. lineLength := (bufferPos - locOldPos);
  1440. Move(locOldPointer^,line[1],lineLength);
  1441. if (p^ = #10) then begin
  1442. Dec(lineLength);
  1443. Inc(p);
  1444. Inc(bufferPos);
  1445. end;
  1446. if (lineLength > 7) then begin
  1447. linePos := 1;
  1448. Result := True;
  1449. end;
  1450. end;
  1451. end;
  1452. function NextToken() : ansistring;
  1453. var
  1454. k : Integer;
  1455. begin
  1456. k := linePos;
  1457. while (linePos < lineLength) and not(line[linePos] in [';','#']) do
  1458. Inc(linePos);
  1459. if (linePos > k) then begin
  1460. if (line[linePos] in [';','#']) then
  1461. Result := Copy(line,k,(linePos-k))
  1462. else
  1463. Result := Copy(line,k,(linePos-k+1));
  1464. Result := Trim(Result);
  1465. end else begin
  1466. Result := '';
  1467. end;
  1468. Inc(linePos);
  1469. end;
  1470. function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
  1471. var
  1472. locStr, ks : ansistring;
  1473. k0,k : Integer;
  1474. begin
  1475. SetLength(Result,0);
  1476. locStr := UpperCase(Trim(AStr));
  1477. if (locStr = '') or (locStr[1] = '<') then
  1478. exit;
  1479. k0 := 1;
  1480. k := 1;
  1481. while (k <= Length(locStr)) do begin
  1482. while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
  1483. inc(k);
  1484. ks := Trim(Copy(locStr,k0,k-k0));
  1485. SetLength(Result,Length(Result)+1);
  1486. Result[Length(Result)-1] := StringToCodePoint(ks);
  1487. Inc(k);
  1488. k0 := k;
  1489. end;
  1490. end;
  1491. procedure ParseLine();
  1492. var
  1493. locCP : TUnicodeCodePoint;
  1494. locProp : TPropRec;
  1495. locData : TDataLineRec;
  1496. s : ansistring;
  1497. locRangeStart, locRangeEnd : Boolean;
  1498. k : Integer;
  1499. locDecompItem : TUnicodeCodePointArray;
  1500. numVal : TNumericValue;
  1501. begin
  1502. FillChar(locData,SizeOf(locData),#0);
  1503. FillChar(locProp,SizeOf(locProp),#0);
  1504. locCP := StrToInt('$'+NextToken());
  1505. s := NextToken();
  1506. locRangeStart := AnsiEndsText(', First>',s);
  1507. if locRangeStart then
  1508. locRangeEnd := False
  1509. else
  1510. locRangeEnd := AnsiEndsText(', Last>',s);
  1511. if locRangeStart then begin
  1512. locData.LineType := 1;
  1513. locData.StartCodePoint := locCP;
  1514. end else if locRangeEnd then begin
  1515. ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
  1516. exit;
  1517. //locData.EndCodePoint := locCP;
  1518. end else begin
  1519. locData.LineType := 0;
  1520. locData.CodePoint := locCP;
  1521. end;
  1522. locProp.Category := StrToCategory(NextToken());
  1523. locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
  1524. NextToken();//Bidi_Class
  1525. s := NextToken();//Decomposition_Type
  1526. locDecompItem := ParseCanonicalDecomposition(s);
  1527. if (Length(locDecompItem) = 0) then
  1528. locProp.DecompositionID := -1
  1529. else begin
  1530. locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
  1531. if (locProp.DecompositionID = -1) then begin
  1532. k := Length(ADecomposition);
  1533. locProp.DecompositionID := k;
  1534. SetLength(ADecomposition,k+1);
  1535. ADecomposition[k] := locDecompItem;
  1536. end;
  1537. end;
  1538. numVal := EvaluateFloat(NextToken());
  1539. if (numVal <> Double(0.0)) then begin
  1540. NextToken();
  1541. NextToken();
  1542. end else begin
  1543. s := NextToken();
  1544. if (s <> '') then
  1545. numVal := EvaluateFloat(s);
  1546. s := NextToken();
  1547. if (numVal = Double(0.0)) then
  1548. numVal := EvaluateFloat(s);
  1549. end;
  1550. k := IndexOf(numVal,ANumericTable,actualNumLen);
  1551. if (k = -1) then begin
  1552. if (actualNumLen >= Length(ANumericTable)) then
  1553. SetLength(ANumericTable,(actualNumLen*2));
  1554. ANumericTable[actualNumLen] := numVal;
  1555. k := actualNumLen;
  1556. Inc(actualNumLen);
  1557. end;
  1558. locProp.NumericIndex := k;
  1559. NextToken();//Bidi_Mirroed
  1560. NextToken();//Unicode_l_Name
  1561. NextToken();//ISO_Comment
  1562. locProp.SimpleUpperCase := StringToCodePoint(NextToken());
  1563. locProp.SimpleLowerCase := StringToCodePoint(NextToken());
  1564. NextToken();//Simple_Title_Case_Mapping
  1565. locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
  1566. locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
  1567. k := IndexOf(locProp,APropList,actualPropLen);
  1568. if (k = -1) then begin
  1569. k := actualPropLen;
  1570. locProp.PropID := k{ + 1};
  1571. APropList[k] := locProp;
  1572. Inc(actualPropLen);
  1573. end;
  1574. locData.PropID := k;
  1575. ADataLineList[actualDataLen] := locData;
  1576. Inc(actualDataLen);
  1577. end;
  1578. procedure Prepare();
  1579. var
  1580. r : TPropRec;
  1581. begin
  1582. SetLength(APropList,PROP_LENGTH);
  1583. actualPropLen := 0;
  1584. SetLength(ADataLineList,DATA_LENGTH);
  1585. actualDataLen := 0;
  1586. bufferLength := ADataAStream.Size;
  1587. bufferPos := 0;
  1588. p := ADataAStream.Memory;
  1589. lineLength := 0;
  1590. SetLength(line,LINE_LENGTH);
  1591. SetLength(ANumericTable,500);
  1592. actualNumLen := 0;
  1593. FillChar(r,SizeOf(r),#0);
  1594. r.PropID := 0;
  1595. r.Category := ucUnassigned;
  1596. r.DecompositionID := -1;
  1597. r.NumericIndex := 0;
  1598. APropList[0] := r;
  1599. Inc(actualPropLen);
  1600. ANumericTable[0] := 0;
  1601. Inc(actualNumLen);
  1602. end;
  1603. begin
  1604. Prepare();
  1605. while NextLine() do
  1606. ParseLine();
  1607. SetLength(APropList,actualPropLen);
  1608. SetLength(ADataLineList,actualDataLen);
  1609. SetLength(ANumericTable,actualNumLen);
  1610. end;
  1611. function GetPropID(
  1612. ACodePoint : TUnicodeCodePoint;
  1613. const ADataLineList : TDataLineRecArray
  1614. ) : Cardinal;
  1615. var
  1616. i : Integer;
  1617. p : PDataLineRec;
  1618. begin
  1619. Result := 0;
  1620. p := @ADataLineList[Low(ADataLineList)];
  1621. for i := Low(ADataLineList) to High(ADataLineList) do begin
  1622. if (p^.LineType = 0) then begin
  1623. if (p^.CodePoint = ACodePoint) then begin
  1624. Result := p^.PropID;
  1625. Break;
  1626. end;
  1627. end else begin
  1628. if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
  1629. Result := p^.PropID;
  1630. Break;
  1631. end;
  1632. end;
  1633. Inc(p);
  1634. end;
  1635. end;
  1636. procedure MakeDecomposition(
  1637. const ARawData : TDecompositionArray;
  1638. var ABook : TDecompositionBook
  1639. );
  1640. var
  1641. i, c, locPos : Integer;
  1642. locItem : TUnicodeCodePointArray;
  1643. begin
  1644. c := 0;
  1645. for i := Low(ARawData) to High(ARawData) do
  1646. c := c + Length(ARawData[i]);
  1647. SetLength(ABook.CodePoints,c);
  1648. SetLength(ABook.Index,Length(ARawData));
  1649. locPos := 0;
  1650. for i := Low(ARawData) to High(ARawData) do begin
  1651. locItem := ARawData[i];
  1652. ABook.Index[i].StartPosition := locPos;
  1653. ABook.Index[i].Length := Length(locItem);
  1654. Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
  1655. locPos := locPos + Length(locItem);
  1656. end;
  1657. end;
  1658. type
  1659. PBmpSecondTableItem = ^TBmpSecondTableItem;
  1660. function IndexOf(
  1661. const AItem : PBmpSecondTableItem;
  1662. const ATable : TBmpSecondTable;
  1663. const ATableActualLength : Integer
  1664. ) : Integer;overload;
  1665. var
  1666. i : Integer;
  1667. p : PBmpSecondTableItem;
  1668. begin
  1669. Result := -1;
  1670. if (ATableActualLength > 0) then begin
  1671. p := @ATable[0];
  1672. for i := 0 to ATableActualLength - 1 do begin
  1673. if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
  1674. Result := i;
  1675. Break;
  1676. end;
  1677. Inc(p);
  1678. end;
  1679. end;
  1680. end;
  1681. procedure MakeBmpTables(
  1682. var AFirstTable : TBmpFirstTable;
  1683. var ASecondTable : TBmpSecondTable;
  1684. const ADataLineList : TDataLineRecArray
  1685. );
  1686. var
  1687. locLowByte, locHighByte : Byte;
  1688. locTableItem : TBmpSecondTableItem;
  1689. locCP : TUnicodeCodePoint;
  1690. i, locSecondActualLen : Integer;
  1691. begin
  1692. SetLength(ASecondTable,120);
  1693. locSecondActualLen := 0;
  1694. for locHighByte := 0 to 255 do begin
  1695. FillChar(locTableItem,SizeOf(locTableItem),#0);
  1696. for locLowByte := 0 to 255 do begin
  1697. locCP := (locHighByte * 256) + locLowByte;
  1698. locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
  1699. end;
  1700. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  1701. if (i = -1) then begin
  1702. if (locSecondActualLen = Length(ASecondTable)) then
  1703. SetLength(ASecondTable,locSecondActualLen + 50);
  1704. i := locSecondActualLen;
  1705. ASecondTable[i] := locTableItem;
  1706. Inc(locSecondActualLen);
  1707. end;
  1708. AFirstTable[locHighByte] := i;
  1709. end;
  1710. SetLength(ASecondTable,locSecondActualLen);
  1711. end;
  1712. type
  1713. P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
  1714. function IndexOf(
  1715. const AItem : P3lvlBmp3TableItem;
  1716. const ATable : T3lvlBmp3Table;
  1717. const ATableActualLength : Integer
  1718. ) : Integer;overload;
  1719. var
  1720. i : Integer;
  1721. p : P3lvlBmp3TableItem;
  1722. begin
  1723. Result := -1;
  1724. if (ATableActualLength > 0) then begin
  1725. p := @ATable[0];
  1726. for i := 0 to ATableActualLength - 1 do begin
  1727. if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
  1728. Result := i;
  1729. Break;
  1730. end;
  1731. Inc(p);
  1732. end;
  1733. end;
  1734. end;
  1735. type
  1736. P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
  1737. function IndexOf(
  1738. const AItem : P3lvlBmp2TableItem;
  1739. const ATable : T3lvlBmp2Table
  1740. ) : Integer;overload;
  1741. var
  1742. i : Integer;
  1743. p : P3lvlBmp2TableItem;
  1744. begin
  1745. Result := -1;
  1746. if (Length(ATable) > 0) then begin
  1747. p := @ATable[0];
  1748. for i := 0 to Length(ATable) - 1 do begin
  1749. if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
  1750. Result := i;
  1751. Break;
  1752. end;
  1753. Inc(p);
  1754. end;
  1755. end;
  1756. end;
  1757. procedure MakeBmpTables3Levels(
  1758. var AFirstTable : T3lvlBmp1Table;
  1759. var ASecondTable : T3lvlBmp2Table;
  1760. var AThirdTable : T3lvlBmp3Table;
  1761. const ADataLineList : TDataLineRecArray
  1762. );
  1763. var
  1764. locLowByte0, locLowByte1, locHighByte : Byte;
  1765. locTableItem2 : T3lvlBmp2TableItem;
  1766. locTableItem3 : T3lvlBmp3TableItem;
  1767. locCP : TUnicodeCodePoint;
  1768. i, locThirdActualLen : Integer;
  1769. begin
  1770. SetLength(AThirdTable,120);
  1771. locThirdActualLen := 0;
  1772. for locHighByte := 0 to 255 do begin
  1773. FillChar(locTableItem2,SizeOf(locTableItem2),#0);
  1774. for locLowByte0 := 0 to 15 do begin
  1775. FillChar(locTableItem3,SizeOf(locTableItem3),#0);
  1776. for locLowByte1 := 0 to 15 do begin
  1777. locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
  1778. locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
  1779. end;
  1780. i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
  1781. if (i = -1) then begin
  1782. if (locThirdActualLen = Length(AThirdTable)) then
  1783. SetLength(AThirdTable,locThirdActualLen + 50);
  1784. i := locThirdActualLen;
  1785. AThirdTable[i] := locTableItem3;
  1786. Inc(locThirdActualLen);
  1787. end;
  1788. locTableItem2[locLowByte0] := i;
  1789. end;
  1790. i := IndexOf(@locTableItem2,ASecondTable);
  1791. if (i = -1) then begin
  1792. i := Length(ASecondTable);
  1793. SetLength(ASecondTable,(i + 1));
  1794. ASecondTable[i] := locTableItem2;
  1795. end;
  1796. AFirstTable[locHighByte] := i;
  1797. end;
  1798. SetLength(AThirdTable,locThirdActualLen);
  1799. end;
  1800. procedure GenerateLicenceText(ADest : TStream);
  1801. var
  1802. s : ansistring;
  1803. begin
  1804. s := SLicenseText + sLineBreak + sLineBreak;
  1805. ADest.Write(s[1],Length(s));
  1806. end;
  1807. procedure GenerateBmpTables(
  1808. ADest : TStream;
  1809. var AFirstTable : TBmpFirstTable;
  1810. var ASecondTable : TBmpSecondTable
  1811. );
  1812. procedure AddLine(const ALine : ansistring);
  1813. var
  1814. buffer : ansistring;
  1815. begin
  1816. buffer := ALine + sLineBreak;
  1817. ADest.Write(buffer[1],Length(buffer));
  1818. end;
  1819. var
  1820. i, j, c : Integer;
  1821. locLine : string;
  1822. begin
  1823. AddLine('const');
  1824. AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
  1825. locLine := '';
  1826. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  1827. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  1828. if (((i+1) mod 16) = 0) then begin
  1829. locLine := ' ' + locLine;
  1830. AddLine(locLine);
  1831. locLine := '';
  1832. end;
  1833. end;
  1834. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  1835. locLine := ' ' + locLine;
  1836. AddLine(locLine);
  1837. AddLine(' );' + sLineBreak);
  1838. AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
  1839. c := High(ASecondTable);
  1840. for i := Low(ASecondTable) to c do begin
  1841. locLine := '';
  1842. for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
  1843. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  1844. if (((j+1) mod 16) = 0) then begin
  1845. if (i = c) and (j = 255) then
  1846. Delete(locLine,Length(locLine),1);
  1847. locLine := ' ' + locLine;
  1848. AddLine(locLine);
  1849. locLine := '';
  1850. end;
  1851. end;
  1852. end;
  1853. AddLine(' );' + sLineBreak);
  1854. end;
  1855. //----------------------------------
  1856. procedure Generate3lvlBmpTables(
  1857. ADest : TStream;
  1858. var AFirstTable : T3lvlBmp1Table;
  1859. var ASecondTable : T3lvlBmp2Table;
  1860. var AThirdTable : T3lvlBmp3Table
  1861. );
  1862. procedure AddLine(const ALine : ansistring);
  1863. var
  1864. buffer : ansistring;
  1865. begin
  1866. buffer := ALine + sLineBreak;
  1867. ADest.Write(buffer[1],Length(buffer));
  1868. end;
  1869. var
  1870. i, j, c : Integer;
  1871. locLine : string;
  1872. begin
  1873. AddLine('const');
  1874. AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
  1875. locLine := '';
  1876. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  1877. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  1878. if (((i+1) mod 16) = 0) then begin
  1879. locLine := ' ' + locLine;
  1880. AddLine(locLine);
  1881. locLine := '';
  1882. end;
  1883. end;
  1884. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  1885. locLine := ' ' + locLine;
  1886. AddLine(locLine);
  1887. AddLine(' );' + sLineBreak);
  1888. AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
  1889. c := High(ASecondTable);
  1890. for i := Low(ASecondTable) to c do begin
  1891. locLine := '(';
  1892. for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
  1893. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  1894. Delete(locLine,Length(locLine),1);
  1895. locLine := ' ' + locLine + ')';
  1896. if (i < c) then
  1897. locLine := locLine + ',';
  1898. AddLine(locLine);
  1899. end;
  1900. AddLine(' );' + sLineBreak);
  1901. AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
  1902. c := High(AThirdTable);
  1903. for i := Low(AThirdTable) to c do begin
  1904. locLine := '(';
  1905. for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
  1906. locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
  1907. Delete(locLine,Length(locLine),1);
  1908. locLine := ' ' + locLine + ')';
  1909. if (i < c) then
  1910. locLine := locLine + ',';
  1911. AddLine(locLine);
  1912. end;
  1913. AddLine(' );' + sLineBreak);
  1914. end;
  1915. function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
  1916. begin
  1917. if (AEndian = ekBig) then
  1918. Result := Format(
  1919. '(byte2 : $%s; byte1 : $%s; byte0 : $%s;)',
  1920. [ IntToHex(AValue.byte2,2), IntToHex(AValue.byte1,2),
  1921. IntToHex(AValue.byte0,2)
  1922. ]
  1923. )
  1924. else
  1925. Result := Format(
  1926. '(byte0 : $%s; byte1 : $%s; byte2 : $%s;)',
  1927. [ IntToHex(AValue.byte0,2), IntToHex(AValue.byte1,2),
  1928. IntToHex(AValue.byte2,2)
  1929. ]
  1930. );
  1931. end;
  1932. procedure GeneratePropTable(
  1933. ADest : TStream;
  1934. const APropList : TPropRecArray;
  1935. const AEndian : TEndianKind
  1936. );
  1937. procedure AddLine(const ALine : ansistring);
  1938. var
  1939. buffer : ansistring;
  1940. begin
  1941. buffer := ALine + sLineBreak;
  1942. ADest.Write(buffer[1],Length(buffer));
  1943. end;
  1944. var
  1945. i : Integer;
  1946. locLine : string;
  1947. p : PPropRec;
  1948. begin
  1949. AddLine('');
  1950. AddLine('const');
  1951. AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
  1952. AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
  1953. p := @APropList[0];
  1954. for i := Low(APropList) to High(APropList) - 1 do begin
  1955. locLine := ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
  1956. ' CCC : ' + IntToStr(p^.CCC) + ';' +
  1957. ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
  1958. ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
  1959. ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
  1960. ' DecompositionID : ' + IntToStr(p^.DecompositionID) + '),';
  1961. AddLine(locLine);
  1962. Inc(p);
  1963. end;
  1964. locLine := //' (Category : TUnicodeCategory.' + GetEnumName(pti,Ord(p^.Category)) + ';' +
  1965. ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
  1966. ' CCC : ' + IntToStr(p^.CCC) + ';' +
  1967. ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
  1968. ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
  1969. ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
  1970. ' DecompositionID : ' + IntToStr(p^.DecompositionID) + ')';
  1971. AddLine(locLine);
  1972. AddLine(' );' + sLineBreak);
  1973. end;
  1974. procedure GenerateNumericTable(
  1975. ADest : TStream;
  1976. const ANumList : TNumericValueArray;
  1977. const ACompleteUnit : Boolean
  1978. );
  1979. procedure AddLine(const ALine : ansistring);
  1980. var
  1981. buffer : ansistring;
  1982. begin
  1983. buffer := ALine + sLineBreak;
  1984. ADest.Write(buffer[1],Length(buffer));
  1985. end;
  1986. var
  1987. i : Integer;
  1988. locLine : string;
  1989. p : ^TNumericValue;
  1990. begin
  1991. if ACompleteUnit then begin
  1992. GenerateLicenceText(ADest);
  1993. AddLine('unit unicodenumtable;');
  1994. AddLine('interface');
  1995. AddLine('');
  1996. end;
  1997. AddLine('');
  1998. AddLine('const');
  1999. AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
  2000. AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
  2001. locLine := '';
  2002. p := @ANumList[0];
  2003. for i := Low(ANumList) to High(ANumList) - 1 do begin
  2004. locLine := locLine + FloatToStr(p^,FS) + ' ,';
  2005. if (i > 0) and ((i mod 8) = 0) then begin
  2006. AddLine(' ' + locLine);
  2007. locLine := '';
  2008. end;
  2009. Inc(p);
  2010. end;
  2011. locLine := locLine + FloatToStr(p^,FS);
  2012. AddLine(' ' + locLine);
  2013. AddLine(' );' + sLineBreak);
  2014. if ACompleteUnit then begin
  2015. AddLine('');
  2016. AddLine('implementation');
  2017. AddLine('');
  2018. AddLine('end.');
  2019. end;
  2020. end;
  2021. procedure GenerateDecompositionBookTable(
  2022. ADest : TStream;
  2023. const ABook : TDecompositionBook;
  2024. const AEndian : TEndianKind
  2025. );
  2026. procedure AddLine(const ALine : ansistring);
  2027. var
  2028. buffer : ansistring;
  2029. begin
  2030. buffer := ALine + sLineBreak;
  2031. ADest.Write(buffer[1],Length(buffer));
  2032. end;
  2033. var
  2034. i, k : Integer;
  2035. p : ^TDecompositionIndexRec;
  2036. cp : ^TUnicodeCodePoint;
  2037. cp24 : UInt24;
  2038. locLine : string;
  2039. begin
  2040. AddLine('const');
  2041. AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
  2042. AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
  2043. AddLine('type');
  2044. AddLine(' TDecompositionIndexRec = packed record');
  2045. AddLine(' StartPosition : Word;');
  2046. AddLine(' Length : Byte;');
  2047. AddLine(' end;');
  2048. AddLine(' TDecompositionBookRec = packed record');
  2049. AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
  2050. AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
  2051. AddLine(' end;');
  2052. AddLine('const');
  2053. AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
  2054. p := @ABook.Index[0];
  2055. AddLine(' Index : (// Index BEGIN');
  2056. k := 0;
  2057. locLine := ' ';
  2058. for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
  2059. locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
  2060. ' Length : ' + IntToStr(p^.Length) + '), ';
  2061. k := k + 1;
  2062. if (k >= 2) then begin
  2063. AddLine(locLine);
  2064. locLine := ' ';
  2065. k := 0;
  2066. end;
  2067. Inc(p);
  2068. end;
  2069. locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
  2070. ' Length : ' + IntToStr(p^.Length) + ')';
  2071. AddLine(locLine);
  2072. AddLine(' ); // Index END');
  2073. cp := @ABook.CodePoints[0];
  2074. AddLine(' CodePoints : (// CodePoints BEGIN');
  2075. k := 0;
  2076. locLine := ' ';
  2077. for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
  2078. cp24 := cp^;
  2079. locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
  2080. Inc(k);
  2081. if (k >= 16) then begin
  2082. AddLine(locLine);
  2083. k := 0;
  2084. locLine := ' ';
  2085. end;
  2086. Inc(cp);
  2087. end;
  2088. cp24 := cp^;
  2089. locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
  2090. AddLine(locLine);
  2091. AddLine(' ); // CodePoints END');
  2092. AddLine(' );' + sLineBreak);
  2093. end;
  2094. procedure GenerateOutBmpTable(
  2095. ADest : TStream;
  2096. const AList : TDataLineRecArray
  2097. );
  2098. procedure AddLine(const ALine : ansistring);
  2099. var
  2100. buffer : ansistring;
  2101. begin
  2102. buffer := ALine + sLineBreak;
  2103. ADest.Write(buffer[1],Length(buffer));
  2104. end;
  2105. var
  2106. i, j : Integer;
  2107. locLine : string;
  2108. p : PDataLineRec;
  2109. begin
  2110. AddLine('');
  2111. //AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
  2112. //AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
  2113. j := -1;
  2114. p := @AList[0];
  2115. for i := 0 to Length(AList) - 1 do begin
  2116. if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
  2117. (p^.StartCodePoint > $FFFF)
  2118. then begin
  2119. j := i;
  2120. Break;
  2121. end;
  2122. Inc(p);
  2123. end;
  2124. if (j < 0) then
  2125. exit;
  2126. for i := j to Length(AList) - 2 do begin
  2127. locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
  2128. ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
  2129. ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
  2130. AddLine(locLine);
  2131. Inc(p);
  2132. end;
  2133. locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
  2134. ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
  2135. ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
  2136. AddLine(locLine);
  2137. AddLine(' );' + sLineBreak);
  2138. end;
  2139. function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
  2140. var
  2141. k, i, locResLen : Integer;
  2142. q, p, pr : PDataLineRec;
  2143. k_end : TUnicodeCodePoint;
  2144. begin
  2145. locResLen := 1;
  2146. SetLength(Result,Length(AData));
  2147. FillChar(Result[0],Length(Result),#0);
  2148. Result[0] := AData[0];
  2149. q := @AData[0];
  2150. k := 0;
  2151. while (k < Length(AData)) do begin
  2152. if (q^.LineType = 0) then
  2153. k_end := q^.CodePoint
  2154. else
  2155. k_end := q^.EndCodePoint;
  2156. if ((k+1) = Length(AData)) then begin
  2157. i := k;
  2158. end else begin
  2159. p := @AData[k+1];
  2160. i := k +1;
  2161. while (i < (Length(AData) {- 1})) do begin
  2162. if (p^.PropID <> q^.PropID) then begin
  2163. i := i - 1;
  2164. Break;
  2165. end;
  2166. if (p^.LineType = 0) then begin
  2167. if (p^.CodePoint <> (k_end + 1)) then begin
  2168. i := i - 1;
  2169. Break;
  2170. end;
  2171. Inc(k_end);
  2172. end else begin
  2173. if (p^.StartCodePoint <> (k_end + 1)) then begin
  2174. i := i - 1;
  2175. Break;
  2176. end;
  2177. k_end := p^.EndCodePoint;
  2178. end;
  2179. Inc(i);
  2180. Inc(p);
  2181. end;
  2182. end;
  2183. {if (i = k) then begin
  2184. Result[locResLen] := q^;
  2185. Inc(locResLen);
  2186. end else begin }
  2187. p := @AData[i];
  2188. pr := @Result[locResLen];
  2189. pr^.PropID := q^.PropID;
  2190. if (q^.LineType = 0) then
  2191. pr^.StartCodePoint := q^.CodePoint
  2192. else
  2193. pr^.StartCodePoint := q^.StartCodePoint;
  2194. pr^.LineType := 1;
  2195. if (p^.LineType = 0) then
  2196. pr^.EndCodePoint := p^.CodePoint
  2197. else
  2198. pr^.EndCodePoint := p^.EndCodePoint;
  2199. Inc(locResLen);
  2200. //end;
  2201. k := i + 1;
  2202. if (k = Length(AData)) then
  2203. Break;
  2204. q := @AData[k];
  2205. end;
  2206. SetLength(Result,locResLen);
  2207. end;
  2208. procedure ParseUCAFile(
  2209. ADataAStream : TMemoryStream;
  2210. var ABook : TUCA_DataBook
  2211. );
  2212. const
  2213. LINE_LENGTH = 1024;
  2214. DATA_LENGTH = 25000;
  2215. var
  2216. p : PAnsiChar;
  2217. actualDataLen : Integer;
  2218. bufferLength, bufferPos, lineLength, linePos : Integer;
  2219. line : ansistring;
  2220. function NextLine() : Boolean;
  2221. var
  2222. locOldPos : Integer;
  2223. locOldPointer : PAnsiChar;
  2224. begin
  2225. Result := False;
  2226. locOldPointer := p;
  2227. locOldPos := bufferPos;
  2228. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  2229. Inc(p);
  2230. Inc(bufferPos);
  2231. end;
  2232. if (locOldPos = bufferPos) and (p^ = #10) then begin
  2233. lineLength := 0;
  2234. Inc(p);
  2235. Inc(bufferPos);
  2236. linePos := 1;
  2237. Result := True;
  2238. end else if (locOldPos < bufferPos) then begin
  2239. lineLength := (bufferPos - locOldPos) + 1;
  2240. Move(locOldPointer^,line[1],lineLength);
  2241. if (p^ = #10) then begin
  2242. Dec(lineLength);
  2243. Inc(p);
  2244. Inc(bufferPos);
  2245. end;
  2246. linePos := 1;
  2247. Result := True;
  2248. end;
  2249. end;
  2250. procedure SkipSpace();
  2251. begin
  2252. while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
  2253. Inc(linePos);
  2254. end;
  2255. function NextToken() : ansistring;
  2256. const C_SEPARATORS = [';','#','.','[',']','*','@'];
  2257. var
  2258. k : Integer;
  2259. begin
  2260. SkipSpace();
  2261. k := linePos;
  2262. if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
  2263. Result := line[linePos];
  2264. Inc(linePos);
  2265. exit;
  2266. end;
  2267. while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
  2268. Inc(linePos);
  2269. if (linePos > k) then begin
  2270. if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
  2271. Result := Copy(line,k,(linePos-k))
  2272. else
  2273. Result := Copy(line,k,(linePos-k+1));
  2274. Result := Trim(Result);
  2275. end else begin
  2276. Result := '';
  2277. end;
  2278. end;
  2279. procedure CheckToken(const AToken : string);
  2280. var
  2281. a, b : string;
  2282. begin
  2283. a := LowerCase(Trim(AToken));
  2284. b := LowerCase(Trim(NextToken()));
  2285. if (a <> b) then
  2286. raise Exception.CreateFmt('Expected token "%s" but found "%s".',[a,b]);
  2287. end;
  2288. function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
  2289. var
  2290. s :AnsiString;
  2291. k : Integer;
  2292. begin
  2293. Result := False;
  2294. s := NextToken();
  2295. if (s <> '[') then
  2296. exit;
  2297. s := NextToken();
  2298. if (s = '.') then
  2299. ADest.Variable := False
  2300. else begin
  2301. if (s <> '*') then
  2302. raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
  2303. ADest.Variable := True;
  2304. end;
  2305. ADest.Weights[0] := StrToInt('$'+NextToken());
  2306. for k := 1 to 3 do begin
  2307. CheckToken('.');
  2308. ADest.Weights[k] := StrToInt('$'+NextToken());
  2309. end;
  2310. CheckToken(']');
  2311. Result := True;
  2312. end;
  2313. procedure ParseHeaderVar();
  2314. var
  2315. s,ss : string;
  2316. k : Integer;
  2317. begin
  2318. s := NextToken();
  2319. if (s = 'version') then begin
  2320. ss := '';
  2321. while True do begin
  2322. s := NextToken();
  2323. if (s = '') then
  2324. Break;
  2325. ss := ss + s;
  2326. end;
  2327. ABook.Version := ss;
  2328. end else if (s = 'variable') then begin
  2329. if (s = 'blanked') then
  2330. ABook.VariableWeight := ucaBlanked
  2331. else if (s = 'non-ignorable') then
  2332. ABook.VariableWeight := ucaNonIgnorable
  2333. else if (s = 'shifted') then
  2334. ABook.VariableWeight := ucaShifted
  2335. else if (s = 'shift-trimmed') then
  2336. ABook.VariableWeight := ucaShiftedTrimmed
  2337. else if (s = 'ignoresp') then
  2338. ABook.VariableWeight := ucaIgnoreSP
  2339. else
  2340. raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
  2341. end else if (s = 'backwards') or (s = 'forwards') then begin
  2342. ss := s;
  2343. s := NextToken();
  2344. k := StrToInt(s);
  2345. if (k < 1) or (k > 4) then
  2346. raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
  2347. ABook.Backwards[k] := (s = 'backwards');
  2348. end;
  2349. end;
  2350. procedure ParseLine();
  2351. var
  2352. locData : ^TUCA_LineRec;
  2353. s : ansistring;
  2354. kc : Integer;
  2355. begin
  2356. if (Length(ABook.Lines) <= actualDataLen) then
  2357. SetLength(ABook.Lines,Length(ABook.Lines)*2);
  2358. locData := @ABook.Lines[actualDataLen];
  2359. s := NextToken();
  2360. if (s = '') or (s[1] = '#') then
  2361. exit;
  2362. if (s[1] = '@') then begin
  2363. ParseHeaderVar();
  2364. exit;
  2365. end;
  2366. SetLength(locData^.CodePoints,10);
  2367. locData^.CodePoints[0] := StrToInt('$'+s);
  2368. kc := 1;
  2369. while True do begin
  2370. s := Trim(NextToken());
  2371. if (s = '') then
  2372. exit;
  2373. if (s = ';') then
  2374. Break;
  2375. locData^.CodePoints[kc] := StrToInt('$'+s);
  2376. Inc(kc);
  2377. end;
  2378. if (kc = 0) then
  2379. exit;
  2380. SetLength(locData^.CodePoints,kc);
  2381. SetLength(locData^.Weights,24);
  2382. kc := 0;
  2383. while ReadWeightBlock(locData^.Weights[kc]) do begin
  2384. Inc(kc);
  2385. end;
  2386. SetLength(locData^.Weights,kc);
  2387. Inc(actualDataLen);
  2388. end;
  2389. procedure Prepare();
  2390. var
  2391. k : Integer;
  2392. begin
  2393. ABook.VariableWeight := ucaShifted;
  2394. for k := Low(ABook.Backwards) to High(ABook.Backwards) do
  2395. ABook.Backwards[k] := False;
  2396. SetLength(ABook.Lines,DATA_LENGTH);
  2397. actualDataLen := 0;
  2398. bufferLength := ADataAStream.Size;
  2399. bufferPos := 0;
  2400. p := ADataAStream.Memory;
  2401. lineLength := 0;
  2402. SetLength(line,LINE_LENGTH);
  2403. end;
  2404. begin
  2405. Prepare();
  2406. while NextLine() do
  2407. ParseLine();
  2408. SetLength(ABook.Lines,actualDataLen);
  2409. end;
  2410. procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
  2411. var
  2412. i : Integer;
  2413. begin
  2414. Write(ATitle, ' ');
  2415. for i := 0 to Length(X) - 1 do
  2416. Write(X[i],' ');
  2417. WriteLn();
  2418. end;
  2419. function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
  2420. var
  2421. i, hb : Integer;
  2422. begin
  2423. if (A=B) then
  2424. exit(0);
  2425. Result := 1;
  2426. hb := Length(B^.CodePoints) - 1;
  2427. for i := 0 to Length(A^.CodePoints) - 1 do begin
  2428. if (i > hb) then
  2429. exit;
  2430. if (A^.CodePoints[i] < B^.CodePoints[i]) then
  2431. exit(-1);
  2432. if (A^.CodePoints[i] > B^.CodePoints[i]) then
  2433. exit(1);
  2434. end;
  2435. if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
  2436. exit(0);
  2437. exit(-1);
  2438. end;
  2439. Procedure QuickSort(var AList: TUCA_DataBookIndex; L, R : Longint;
  2440. ABook : PUCA_DataBook);
  2441. var
  2442. I, J : Longint;
  2443. P, Q : Integer;
  2444. begin
  2445. repeat
  2446. I := L;
  2447. J := R;
  2448. P := AList[ (L + R) div 2 ];
  2449. repeat
  2450. while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
  2451. I := I + 1;
  2452. while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
  2453. J := J - 1;
  2454. If I <= J then
  2455. begin
  2456. Q := AList[I];
  2457. AList[I] := AList[J];
  2458. AList[J] := Q;
  2459. I := I + 1;
  2460. J := J - 1;
  2461. end;
  2462. until I > J;
  2463. // sort the smaller range recursively
  2464. // sort the bigger range via the loop
  2465. // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
  2466. if J - L < R - I then
  2467. begin
  2468. if L < J then
  2469. QuickSort(AList, L, J, ABook);
  2470. L := I;
  2471. end
  2472. else
  2473. begin
  2474. if I < R then
  2475. QuickSort(AList, I, R, ABook);
  2476. R := J;
  2477. end;
  2478. until L >= R;
  2479. end;
  2480. function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
  2481. var
  2482. r : TUCA_DataBookIndex;
  2483. i, c : Integer;
  2484. begin
  2485. c := Length(ABook^.Lines);
  2486. SetLength(r,c);
  2487. for i := 0 to c - 1 do
  2488. r[i] := i;
  2489. QuickSort(r,0,c-1,ABook);
  2490. Result := r;
  2491. end;
  2492. function ConstructContextTree(
  2493. const AContext : PUCA_LineContextRec;
  2494. var ADestBuffer;
  2495. const ADestBufferLength : Cardinal
  2496. ) : PUCA_PropItemContextTreeRec;forward;
  2497. function ConstructItem(
  2498. AItem : PUCA_PropItemRec;
  2499. ACodePoint : Cardinal;
  2500. AValid : Byte;
  2501. AChildCount : Byte;
  2502. const AWeights : array of TUCA_WeightRec;
  2503. const AStoreCP : Boolean;
  2504. const AContext : PUCA_LineContextRec;
  2505. const ADeleted : Boolean
  2506. ) : Cardinal;
  2507. var
  2508. i : Integer;
  2509. p : PUCA_PropItemRec;
  2510. pw : PUCA_PropWeights;
  2511. pb : PByte;
  2512. hasContext : Boolean;
  2513. contextTree : PUCA_PropItemContextTreeRec;
  2514. wl : Integer;
  2515. begin
  2516. p := AItem;
  2517. p^.Size := 0;
  2518. p^.Flags := 0;
  2519. p^.WeightLength := 0;
  2520. SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
  2521. p^.ChildCount := AChildCount;
  2522. hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
  2523. if hasContext then
  2524. wl := 0
  2525. else
  2526. wl := Length(AWeights);
  2527. p^.WeightLength := wl;
  2528. if (wl = 0) then begin
  2529. Result := SizeOf(TUCA_PropItemRec);
  2530. if ADeleted then
  2531. SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
  2532. end else begin
  2533. Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
  2534. pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
  2535. Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
  2536. pb := pb + 2;
  2537. if (AWeights[0].Weights[1] > High(Byte)) then begin
  2538. Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
  2539. pb := pb + 2;
  2540. end else begin
  2541. SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
  2542. pb^ := AWeights[0].Weights[1];
  2543. pb := pb + 1;
  2544. Result := Result - 1;
  2545. end;
  2546. if (AWeights[0].Weights[2] > High(Byte)) then begin
  2547. Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
  2548. pb := pb + 2;
  2549. end else begin
  2550. SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
  2551. pb^ := AWeights[0].Weights[2];
  2552. pb := pb + 1;
  2553. Result := Result - 1;
  2554. end;
  2555. pw := PUCA_PropWeights(pb);
  2556. for i := 1 to wl - 1 do begin
  2557. pw^.Weights[0] := AWeights[i].Weights[0];
  2558. pw^.Weights[1] := AWeights[i].Weights[1];
  2559. pw^.Weights[2] := AWeights[i].Weights[2];
  2560. //pw^.Variable := BoolToByte(AWeights[i].Variable);
  2561. Inc(pw);
  2562. end;
  2563. end;
  2564. hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
  2565. if AStoreCP or hasContext then begin
  2566. Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
  2567. Result := Result + SizeOf(UInt24);
  2568. SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
  2569. end;
  2570. if hasContext then begin
  2571. contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
  2572. Result := Result + Cardinal(contextTree^.Size);
  2573. SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
  2574. end;
  2575. p^.Size := Result;
  2576. end;
  2577. function CalcCharChildCount(
  2578. const ASearchStartPos : Integer;
  2579. const ALinePos : Integer;
  2580. const ABookLines : PUCA_LineRec;
  2581. const AMaxLength : Integer;
  2582. const ABookIndex : TUCA_DataBookIndex;
  2583. out ALineCount : Word
  2584. ) : Byte;
  2585. var
  2586. locLinePos : Integer;
  2587. p : PUCA_LineRec;
  2588. procedure IncP();
  2589. begin
  2590. Inc(locLinePos);
  2591. p := @ABookLines[ABookIndex[locLinePos]];
  2592. end;
  2593. var
  2594. i, locTargetLen, locTargetBufferSize, r : Integer;
  2595. locTarget : array[0..127] of Cardinal;
  2596. locLastChar : Cardinal;
  2597. begin
  2598. locLinePos := ALinePos;
  2599. p := @ABookLines[ABookIndex[locLinePos]];
  2600. locTargetLen := ASearchStartPos;
  2601. locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
  2602. Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
  2603. if (Length(p^.CodePoints) = ASearchStartPos) then begin
  2604. r := 0;
  2605. locLastChar := High(Cardinal);
  2606. end else begin
  2607. r := 1;
  2608. locLastChar := p^.CodePoints[ASearchStartPos];
  2609. end;
  2610. i := 1;
  2611. while (i < AMaxLength) do begin
  2612. IncP();
  2613. if (Length(p^.CodePoints) < locTargetLen) then
  2614. Break;
  2615. if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
  2616. Break;
  2617. if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
  2618. Inc(r);
  2619. locLastChar := p^.CodePoints[ASearchStartPos];
  2620. end;
  2621. Inc(i);
  2622. end;
  2623. ALineCount := i;
  2624. Result := r;
  2625. end;
  2626. function BuildTrie(
  2627. const ALinePos : Integer;
  2628. const ABookLines : PUCA_LineRec;
  2629. const AMaxLength : Integer;
  2630. const ABookIndex : TUCA_DataBookIndex
  2631. ) : PTrieNode;
  2632. var
  2633. p : PUCA_LineRec;
  2634. root : PTrieNode;
  2635. ki, k, i : Integer;
  2636. key : array of TKeyType;
  2637. begin
  2638. k := ABookIndex[ALinePos];
  2639. p := @ABookLines[k];
  2640. if (Length(p^.CodePoints) = 1) then
  2641. root := CreateNode(p^.CodePoints[0],k)
  2642. else
  2643. root := CreateNode(p^.CodePoints[0]);
  2644. for i := ALinePos to ALinePos + AMaxLength - 1 do begin
  2645. k := ABookIndex[i];
  2646. p := @ABookLines[k];
  2647. if (Length(p^.CodePoints) = 1) then begin
  2648. InsertWord(root,p^.CodePoints[0],k);
  2649. end else begin
  2650. SetLength(key,Length(p^.CodePoints));
  2651. for ki := 0 to Length(p^.CodePoints) - 1 do
  2652. key[ki] := p^.CodePoints[ki];
  2653. InsertWord(root,key,k);
  2654. end;
  2655. end;
  2656. Result := root;
  2657. end;
  2658. function BoolToByte(AValue : Boolean): Byte;inline;
  2659. begin
  2660. if AValue then
  2661. Result := 1
  2662. else
  2663. Result := 0;
  2664. end;
  2665. function InternalConstructFromTrie(
  2666. const ATrie : PTrieNode;
  2667. const AItem : PUCA_PropItemRec;
  2668. const ALines : PUCA_LineRec;
  2669. const AStoreCp : Boolean
  2670. ) : Cardinal;
  2671. var
  2672. i : Integer;
  2673. size : Cardinal;
  2674. p : PUCA_PropItemRec;
  2675. n : PTrieNode;
  2676. begin
  2677. if (ATrie = nil) then
  2678. exit(0);
  2679. p := AItem;
  2680. n := ATrie;
  2681. if n^.DataNode then
  2682. size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
  2683. else
  2684. size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
  2685. Result := size;
  2686. if (n^.ChildCount > 0) then begin
  2687. for i := 0 to n^.ChildCount - 1 do begin
  2688. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2689. size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
  2690. Result := Result + size;
  2691. end;
  2692. end;
  2693. AItem^.Size := Result;
  2694. end;
  2695. function ConstructFromTrie(
  2696. const ATrie : PTrieNode;
  2697. const AItem : PUCA_PropItemRec;
  2698. const ALines : PUCA_LineRec
  2699. ) : Integer;
  2700. begin
  2701. Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
  2702. end;
  2703. procedure MakeUCA_Props(
  2704. ABook : PUCA_DataBook;
  2705. out AProps : PUCA_PropBook
  2706. );
  2707. var
  2708. propIndexCount : Integer;
  2709. procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
  2710. begin
  2711. AProps^.Index[propIndexCount].CodePoint := ACodePoint;
  2712. AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
  2713. propIndexCount := propIndexCount + 1;
  2714. end;
  2715. var
  2716. locIndex : TUCA_DataBookIndex;
  2717. i, c, k, kc : Integer;
  2718. p, p1, p2 : PUCA_PropItemRec;
  2719. lines, pl1, pl2 : PUCA_LineRec;
  2720. childCount, lineCount : Word;
  2721. size : Cardinal;
  2722. trieRoot : PTrieNode;
  2723. MaxChildCount, MaxSize : Cardinal;
  2724. childList : array of PUCA_PropItemRec;
  2725. begin
  2726. locIndex := CreateIndex(ABook);
  2727. i := Length(ABook^.Lines);
  2728. i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
  2729. AProps := AllocMem(SizeOf(TUCA_DataBook));
  2730. AProps^.ItemSize := i;
  2731. AProps^.Items := AllocMem(i);
  2732. propIndexCount := 0;
  2733. SetLength(AProps^.Index,Length(ABook^.Lines));
  2734. p := AProps^.Items;
  2735. lines := @ABook^.Lines[0];
  2736. c := Length(locIndex);
  2737. i := 0;
  2738. MaxChildCount := 0; MaxSize := 0;
  2739. while (i < (c-1)) do begin
  2740. pl1 := @lines[locIndex[i]];
  2741. if not pl1^.Stored then begin
  2742. i := i + 1;
  2743. Continue;
  2744. end;
  2745. pl2 := @lines[locIndex[i+1]];
  2746. if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
  2747. if (Length(pl1^.CodePoints) = 1) then begin
  2748. size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
  2749. CapturePropIndex(p,pl1^.CodePoints[0]);
  2750. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2751. if (size > MaxSize) then
  2752. MaxSize := size;
  2753. end else begin
  2754. kc := Length(pl1^.CodePoints);
  2755. SetLength(childList,kc);
  2756. for k := 0 to kc - 2 do begin
  2757. size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
  2758. if (k = 0) then
  2759. CapturePropIndex(p,pl1^.CodePoints[k]);
  2760. childList[k] := p;
  2761. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2762. end;
  2763. size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
  2764. childList[kc-1] := p;
  2765. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2766. for k := kc - 2 downto 0 do begin
  2767. p1 := childList[k];
  2768. p2 := childList[k+1];
  2769. p1^.Size := p1^.Size + p2^.Size;
  2770. end;
  2771. if (p1^.Size > MaxSize) then
  2772. MaxSize := p1^.Size;
  2773. end;
  2774. lineCount := 1;
  2775. end else begin
  2776. childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
  2777. if (childCount < 1) then
  2778. raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
  2779. if (lineCount < 2) then
  2780. raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
  2781. if (childCount > MaxChildCount) then
  2782. MaxChildCount := childCount;
  2783. trieRoot := BuildTrie(i,lines,lineCount,locIndex);
  2784. size := ConstructFromTrie(trieRoot,p,lines);
  2785. CapturePropIndex(p,pl1^.CodePoints[0]);
  2786. FreeNode(trieRoot);
  2787. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2788. if (size > MaxSize) then
  2789. MaxSize := size;
  2790. end;
  2791. i := i + lineCount;
  2792. end;
  2793. if (i = (c-1)) then begin
  2794. pl1 := @lines[locIndex[i]];
  2795. if (Length(pl1^.CodePoints) = 1) then begin
  2796. size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
  2797. CapturePropIndex(p,pl1^.CodePoints[0]);
  2798. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2799. if (size > MaxSize) then
  2800. MaxSize := size;
  2801. end else begin
  2802. kc := Length(pl1^.CodePoints);
  2803. SetLength(childList,kc);
  2804. for k := 0 to kc - 2 do begin
  2805. size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
  2806. if (k = 0) then
  2807. CapturePropIndex(p,pl1^.CodePoints[0]);
  2808. childList[k] := p;
  2809. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2810. end;
  2811. size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
  2812. childList[kc-1] := p;
  2813. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2814. for i := kc - 2 downto 0 do begin
  2815. p1 := childList[i];
  2816. p2 := childList[i+1];
  2817. p1^.Size := p1^.Size + p2^.Size;
  2818. end;
  2819. if (size > MaxSize) then
  2820. MaxSize := size;
  2821. end;
  2822. end;
  2823. c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
  2824. ReAllocMem(AProps^.Items,c);
  2825. AProps^.ItemSize := c;
  2826. SetLength(AProps^.Index,propIndexCount);
  2827. AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
  2828. ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
  2829. k := 0;
  2830. c := High(Word);
  2831. for i := 0 to Length(ABook^.Lines) - 1 do begin
  2832. if (Length(ABook^.Lines[i].Weights) > 0) then begin
  2833. if (ABook^.Lines[i].Weights[0].Variable) then begin
  2834. if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
  2835. k := ABook^.Lines[i].Weights[0].Weights[0];
  2836. if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
  2837. c := ABook^.Lines[i].Weights[0].Weights[0];
  2838. end;
  2839. end;
  2840. end;
  2841. AProps^.VariableHighLimit := k;
  2842. AProps^.VariableLowLimit := c;
  2843. end;
  2844. procedure FreeUcaBook(var ABook : PUCA_PropBook);
  2845. var
  2846. p : PUCA_PropBook;
  2847. begin
  2848. if (ABook = nil) then
  2849. exit;
  2850. p := ABook;
  2851. ABook := nil;
  2852. p^.Index := nil;
  2853. FreeMem(p^.Items,p^.ItemSize);
  2854. FreeMem(p,SizeOf(p^));
  2855. end;
  2856. function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
  2857. var
  2858. i : Integer;
  2859. begin
  2860. for i := 0 to Length(APropBook^.Index) - 1 do begin
  2861. if (ACodePoint = APropBook^.Index[i].CodePoint) then
  2862. exit(i);
  2863. end;
  2864. Result := -1;
  2865. end;
  2866. type
  2867. PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
  2868. function IndexOf(
  2869. const AItem : PucaBmpSecondTableItem;
  2870. const ATable : TucaBmpSecondTable;
  2871. const ATableActualLength : Integer
  2872. ) : Integer;overload;
  2873. var
  2874. i : Integer;
  2875. p : PucaBmpSecondTableItem;
  2876. begin
  2877. Result := -1;
  2878. if (ATableActualLength > 0) then begin
  2879. p := @ATable[0];
  2880. for i := 0 to ATableActualLength - 1 do begin
  2881. if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
  2882. Result := i;
  2883. Break;
  2884. end;
  2885. Inc(p);
  2886. end;
  2887. end;
  2888. end;
  2889. procedure MakeUCA_BmpTables(
  2890. var AFirstTable : TucaBmpFirstTable;
  2891. var ASecondTable : TucaBmpSecondTable;
  2892. const APropBook : PUCA_PropBook
  2893. );
  2894. var
  2895. locLowByte, locHighByte : Byte;
  2896. locTableItem : TucaBmpSecondTableItem;
  2897. locCP : TUnicodeCodePoint;
  2898. i, locSecondActualLen : Integer;
  2899. k : Integer;
  2900. begin
  2901. SetLength(ASecondTable,120);
  2902. locSecondActualLen := 0;
  2903. for locHighByte := 0 to 255 do begin
  2904. FillChar(locTableItem,SizeOf(locTableItem),#0);
  2905. for locLowByte := 0 to 255 do begin
  2906. locCP := (locHighByte * 256) + locLowByte;
  2907. k := IndexOf(locCP,APropBook);
  2908. if (k = -1) then
  2909. k := 0
  2910. else
  2911. k := APropBook^.Index[k].Position + 1;
  2912. locTableItem[locLowByte] := k;
  2913. end;
  2914. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  2915. if (i = -1) then begin
  2916. if (locSecondActualLen = Length(ASecondTable)) then
  2917. SetLength(ASecondTable,locSecondActualLen + 50);
  2918. i := locSecondActualLen;
  2919. ASecondTable[i] := locTableItem;
  2920. Inc(locSecondActualLen);
  2921. end;
  2922. AFirstTable[locHighByte] := i;
  2923. end;
  2924. SetLength(ASecondTable,locSecondActualLen);
  2925. end;
  2926. function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
  2927. begin
  2928. //copied from utf16toutf32
  2929. Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
  2930. end;
  2931. procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
  2932. begin
  2933. AHighS := Word((AValue - $10000) shr 10 + $d800);
  2934. ALowS := Word((AValue - $10000) and $3ff + $dc00);
  2935. end;
  2936. type
  2937. PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
  2938. function IndexOf(
  2939. const AItem : PucaOBmpSecondTableItem;
  2940. const ATable : TucaOBmpSecondTable;
  2941. const ATableActualLength : Integer
  2942. ) : Integer;overload;
  2943. var
  2944. i : Integer;
  2945. p : PucaOBmpSecondTableItem;
  2946. begin
  2947. Result := -1;
  2948. if (ATableActualLength > 0) then begin
  2949. p := @ATable[0];
  2950. for i := 0 to ATableActualLength - 1 do begin
  2951. if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
  2952. Result := i;
  2953. Break;
  2954. end;
  2955. Inc(p);
  2956. end;
  2957. end;
  2958. end;
  2959. procedure MakeUCA_OBmpTables(
  2960. var AFirstTable : TucaOBmpFirstTable;
  2961. var ASecondTable : TucaOBmpSecondTable;
  2962. const APropBook : PUCA_PropBook
  2963. );
  2964. var
  2965. locLowByte, locHighByte : Word;
  2966. locTableItem : TucaOBmpSecondTableItem;
  2967. locCP : TUnicodeCodePoint;
  2968. i, locSecondActualLen : Integer;
  2969. k : Integer;
  2970. begin
  2971. if (Length(ASecondTable) = 0) then
  2972. SetLength(ASecondTable,2000);
  2973. locSecondActualLen := 0;
  2974. for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
  2975. FillChar(locTableItem,SizeOf(locTableItem),#0);
  2976. for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
  2977. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
  2978. k := IndexOf(locCP,APropBook);
  2979. if (k = -1) then
  2980. k := 0
  2981. else
  2982. k := APropBook^.Index[k].Position + 1;
  2983. locTableItem[locLowByte] := k;
  2984. end;
  2985. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  2986. if (i = -1) then begin
  2987. if (locSecondActualLen = Length(ASecondTable)) then
  2988. SetLength(ASecondTable,locSecondActualLen + 50);
  2989. i := locSecondActualLen;
  2990. ASecondTable[i] := locTableItem;
  2991. Inc(locSecondActualLen);
  2992. end;
  2993. AFirstTable[locHighByte] := i;
  2994. end;
  2995. SetLength(ASecondTable,locSecondActualLen);
  2996. end;
  2997. function GetPropPosition(
  2998. const AHighS,
  2999. ALowS : Word;
  3000. const AFirstTable : PucaOBmpFirstTable;
  3001. const ASecondTable : PucaOBmpSecondTable
  3002. ): Integer;inline;overload;
  3003. begin
  3004. Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
  3005. end;
  3006. procedure GenerateUCA_Head(
  3007. ADest : TStream;
  3008. ABook : PUCA_DataBook;
  3009. AProps : PUCA_PropBook
  3010. );
  3011. procedure AddLine(const ALine : ansistring);
  3012. var
  3013. buffer : ansistring;
  3014. begin
  3015. buffer := ALine + sLineBreak;
  3016. ADest.Write(buffer[1],Length(buffer));
  3017. end;
  3018. begin
  3019. AddLine('const');
  3020. AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
  3021. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
  3022. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
  3023. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
  3024. AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
  3025. AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
  3026. AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
  3027. AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
  3028. AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
  3029. AddLine('');
  3030. end;
  3031. procedure GenerateUCA_BmpTables(
  3032. AStream,
  3033. ANativeEndianStream,
  3034. ANonNativeEndianStream : TStream;
  3035. var AFirstTable : TucaBmpFirstTable;
  3036. var ASecondTable : TucaBmpSecondTable
  3037. );
  3038. procedure AddLine(AOut : TStream; const ALine : ansistring);
  3039. var
  3040. buffer : ansistring;
  3041. begin
  3042. buffer := ALine + sLineBreak;
  3043. AOut.Write(buffer[1],Length(buffer));
  3044. end;
  3045. var
  3046. i, j, c : Integer;
  3047. locLine : string;
  3048. value : UInt24;
  3049. begin
  3050. AddLine(AStream,'const');
  3051. AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
  3052. locLine := '';
  3053. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3054. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3055. if (((i+1) mod 16) = 0) then begin
  3056. locLine := ' ' + locLine;
  3057. AddLine(AStream,locLine);
  3058. locLine := '';
  3059. end;
  3060. end;
  3061. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3062. locLine := ' ' + locLine;
  3063. AddLine(AStream,locLine);
  3064. AddLine(AStream,' );' + sLineBreak);
  3065. AddLine(ANativeEndianStream,'const');
  3066. AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3067. c := High(ASecondTable);
  3068. for i := Low(ASecondTable) to c do begin
  3069. locLine := '';
  3070. for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
  3071. value := ASecondTable[i][j];
  3072. locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
  3073. if (((j+1) mod 2) = 0) then begin
  3074. if (i = c) and (j = 255) then
  3075. Delete(locLine,Length(locLine),1);
  3076. locLine := ' ' + locLine;
  3077. AddLine(ANativeEndianStream,locLine);
  3078. locLine := '';
  3079. end;
  3080. end;
  3081. end;
  3082. AddLine(ANativeEndianStream,' );' + sLineBreak);
  3083. AddLine(ANonNativeEndianStream,'const');
  3084. AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3085. c := High(ASecondTable);
  3086. for i := Low(ASecondTable) to c do begin
  3087. locLine := '';
  3088. for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
  3089. value := ASecondTable[i][j];
  3090. locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
  3091. if (((j+1) mod 2) = 0) then begin
  3092. if (i = c) and (j = 255) then
  3093. Delete(locLine,Length(locLine),1);
  3094. locLine := ' ' + locLine;
  3095. AddLine(ANonNativeEndianStream,locLine);
  3096. locLine := '';
  3097. end;
  3098. end;
  3099. end;
  3100. AddLine(ANonNativeEndianStream,' );' + sLineBreak);
  3101. end;
  3102. procedure GenerateBinaryUCA_BmpTables(
  3103. ANativeEndianStream,
  3104. ANonNativeEndianStream : TStream;
  3105. var AFirstTable : TucaBmpFirstTable;
  3106. var ASecondTable : TucaBmpSecondTable
  3107. );
  3108. var
  3109. i, j : Integer;
  3110. value : UInt24;
  3111. begin
  3112. ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
  3113. ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
  3114. for i := Low(ASecondTable) to High(ASecondTable) do begin
  3115. for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
  3116. value := ASecondTable[i][j];
  3117. ANativeEndianStream.Write(value,SizeOf(value));
  3118. ReverseBytes(value,SizeOf(value));
  3119. ANonNativeEndianStream.Write(value,SizeOf(value));
  3120. end;
  3121. end;
  3122. end;
  3123. procedure GenerateUCA_PropTable(
  3124. // WARNING : files must be generated for each endianess (Little / Big)
  3125. ADest : TStream;
  3126. const APropBook : PUCA_PropBook;
  3127. const AEndian : TEndianKind
  3128. );
  3129. procedure AddLine(const ALine : ansistring);
  3130. var
  3131. buffer : ansistring;
  3132. begin
  3133. buffer := ALine + sLineBreak;
  3134. ADest.Write(buffer[1],Length(buffer));
  3135. end;
  3136. var
  3137. i, c : Integer;
  3138. locLine : string;
  3139. p : PByte;
  3140. begin
  3141. c := APropBook^.ItemSize;
  3142. AddLine('const');
  3143. AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
  3144. locLine := '';
  3145. if (AEndian = ENDIAN_NATIVE) then
  3146. p := PByte(APropBook^.Items)
  3147. else
  3148. p := PByte(APropBook^.ItemsOtherEndian);
  3149. for i := 0 to c - 2 do begin
  3150. locLine := locLine + IntToStr(p[i]) + ',';
  3151. if (((i+1) mod 60) = 0) then begin
  3152. locLine := ' ' + locLine;
  3153. AddLine(locLine);
  3154. locLine := '';
  3155. end;
  3156. end;
  3157. locLine := locLine + IntToStr(p[c-1]);
  3158. locLine := ' ' + locLine;
  3159. AddLine(locLine);
  3160. AddLine(' );' + sLineBreak);
  3161. end;
  3162. procedure GenerateBinaryUCA_PropTable(
  3163. // WARNING : files must be generated for each endianess (Little / Big)
  3164. ANativeEndianStream,
  3165. ANonNativeEndianStream : TStream;
  3166. const APropBook : PUCA_PropBook
  3167. );
  3168. begin
  3169. ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
  3170. ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
  3171. end;
  3172. procedure GenerateUCA_OBmpTables(
  3173. AStream,
  3174. ANativeEndianStream,
  3175. ANonNativeEndianStream : TStream;
  3176. var AFirstTable : TucaOBmpFirstTable;
  3177. var ASecondTable : TucaOBmpSecondTable
  3178. );
  3179. procedure AddLine(AOut : TStream; const ALine : ansistring);
  3180. var
  3181. buffer : ansistring;
  3182. begin
  3183. buffer := ALine + sLineBreak;
  3184. AOut.Write(buffer[1],Length(buffer));
  3185. end;
  3186. var
  3187. i, j, c : Integer;
  3188. locLine : string;
  3189. value : UInt24;
  3190. begin
  3191. AddLine(AStream,'const');
  3192. AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
  3193. locLine := '';
  3194. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3195. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3196. if (((i+1) mod 16) = 0) then begin
  3197. locLine := ' ' + locLine;
  3198. AddLine(AStream,locLine);
  3199. locLine := '';
  3200. end;
  3201. end;
  3202. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3203. locLine := ' ' + locLine;
  3204. AddLine(AStream,locLine);
  3205. AddLine(AStream,' );' + sLineBreak);
  3206. AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3207. c := High(ASecondTable);
  3208. for i := Low(ASecondTable) to c do begin
  3209. locLine := '';
  3210. for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
  3211. value := ASecondTable[i][j];
  3212. locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
  3213. if (((j+1) mod 2) = 0) then begin
  3214. if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
  3215. Delete(locLine,Length(locLine),1);
  3216. locLine := ' ' + locLine;
  3217. AddLine(ANativeEndianStream,locLine);
  3218. locLine := '';
  3219. end;
  3220. end;
  3221. end;
  3222. AddLine(ANativeEndianStream,' );' + sLineBreak);
  3223. AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3224. c := High(ASecondTable);
  3225. for i := Low(ASecondTable) to c do begin
  3226. locLine := '';
  3227. for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
  3228. value := ASecondTable[i][j];
  3229. locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
  3230. if (((j+1) mod 2) = 0) then begin
  3231. if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
  3232. Delete(locLine,Length(locLine),1);
  3233. locLine := ' ' + locLine;
  3234. AddLine(ANonNativeEndianStream,locLine);
  3235. locLine := '';
  3236. end;
  3237. end;
  3238. end;
  3239. AddLine(ANonNativeEndianStream,' );' + sLineBreak);
  3240. end;
  3241. procedure GenerateBinaryUCA_OBmpTables(
  3242. ANativeEndianStream,
  3243. ANonNativeEndianStream : TStream;
  3244. var AFirstTable : TucaOBmpFirstTable;
  3245. var ASecondTable : TucaOBmpSecondTable
  3246. );
  3247. var
  3248. i, j : Integer;
  3249. locLine : string;
  3250. wordValue : Word;
  3251. value : UInt24;
  3252. begin
  3253. for i := Low(AFirstTable) to High(AFirstTable) do begin
  3254. wordValue := AFirstTable[i];
  3255. ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
  3256. ReverseBytes(wordValue,SizeOf(wordValue));
  3257. ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
  3258. end;
  3259. for i := Low(ASecondTable) to High(ASecondTable) do begin
  3260. for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
  3261. value := ASecondTable[i][j];
  3262. ANativeEndianStream.Write(value,SizeOf(value));
  3263. ReverseBytes(value,SizeOf(value));
  3264. ANonNativeEndianStream.Write(value,SizeOf(value));
  3265. end;
  3266. end;
  3267. end;
  3268. type
  3269. POBmpSecondTableItem = ^TOBmpSecondTableItem;
  3270. function IndexOf(
  3271. const AItem : POBmpSecondTableItem;
  3272. const ATable : TOBmpSecondTable;
  3273. const ATableActualLength : Integer
  3274. ) : Integer;overload;
  3275. var
  3276. i : Integer;
  3277. p : POBmpSecondTableItem;
  3278. begin
  3279. Result := -1;
  3280. if (ATableActualLength > 0) then begin
  3281. p := @ATable[0];
  3282. for i := 0 to ATableActualLength - 1 do begin
  3283. if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
  3284. Result := i;
  3285. Break;
  3286. end;
  3287. Inc(p);
  3288. end;
  3289. end;
  3290. end;
  3291. procedure MakeOBmpTables(
  3292. var AFirstTable : TOBmpFirstTable;
  3293. var ASecondTable : TOBmpSecondTable;
  3294. const ADataLineList : TDataLineRecArray
  3295. );
  3296. var
  3297. locLowByte, locHighByte : Word;
  3298. locTableItem : TOBmpSecondTableItem;
  3299. locCP : TUnicodeCodePoint;
  3300. i, locSecondActualLen : Integer;
  3301. begin
  3302. SetLength(ASecondTable,2000);
  3303. locSecondActualLen := 0;
  3304. for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
  3305. FillChar(locTableItem,SizeOf(locTableItem),#0);
  3306. for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
  3307. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
  3308. locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
  3309. end;
  3310. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  3311. if (i = -1) then begin
  3312. if (locSecondActualLen = Length(ASecondTable)) then
  3313. SetLength(ASecondTable,locSecondActualLen + 50);
  3314. i := locSecondActualLen;
  3315. ASecondTable[i] := locTableItem;
  3316. Inc(locSecondActualLen);
  3317. end;
  3318. AFirstTable[locHighByte] := i;
  3319. end;
  3320. SetLength(ASecondTable,locSecondActualLen);
  3321. end;
  3322. type
  3323. P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
  3324. function IndexOf(
  3325. const AItem : P3lvlOBmp3TableItem;
  3326. const ATable : T3lvlOBmp3Table;
  3327. const ATableActualLength : Integer
  3328. ) : Integer;overload;
  3329. var
  3330. i : Integer;
  3331. p : P3lvlOBmp3TableItem;
  3332. begin
  3333. Result := -1;
  3334. if (ATableActualLength > 0) then begin
  3335. p := @ATable[0];
  3336. for i := 0 to ATableActualLength - 1 do begin
  3337. if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
  3338. Result := i;
  3339. Break;
  3340. end;
  3341. Inc(p);
  3342. end;
  3343. end;
  3344. end;
  3345. type
  3346. P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
  3347. function IndexOf(
  3348. const AItem : P3lvlOBmp2TableItem;
  3349. const ATable : T3lvlOBmp2Table
  3350. ) : Integer;overload;
  3351. var
  3352. i : Integer;
  3353. p : P3lvlOBmp2TableItem;
  3354. begin
  3355. Result := -1;
  3356. if (Length(ATable) > 0) then begin
  3357. p := @ATable[0];
  3358. for i := 0 to Length(ATable) - 1 do begin
  3359. if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
  3360. Result := i;
  3361. Break;
  3362. end;
  3363. Inc(p);
  3364. end;
  3365. end;
  3366. end;
  3367. procedure MakeOBmpTables3Levels(
  3368. var AFirstTable : T3lvlOBmp1Table;
  3369. var ASecondTable : T3lvlOBmp2Table;
  3370. var AThirdTable : T3lvlOBmp3Table;
  3371. const ADataLineList : TDataLineRecArray
  3372. );
  3373. var
  3374. locLowByte0, locLowByte1, locHighByte : Word;
  3375. locTableItem2 : T3lvlOBmp2TableItem;
  3376. locTableItem3 : T3lvlOBmp3TableItem;
  3377. locCP : TUnicodeCodePoint;
  3378. i, locThirdActualLen : Integer;
  3379. begin
  3380. SetLength(AThirdTable,120);
  3381. locThirdActualLen := 0;
  3382. for locHighByte := 0 to 1023 do begin
  3383. FillChar(locTableItem2,SizeOf(locTableItem2),#0);
  3384. for locLowByte0 := 0 to 31 do begin
  3385. FillChar(locTableItem3,SizeOf(locTableItem3),#0);
  3386. for locLowByte1 := 0 to 31 do begin
  3387. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
  3388. locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
  3389. end;
  3390. i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
  3391. if (i = -1) then begin
  3392. if (locThirdActualLen = Length(AThirdTable)) then
  3393. SetLength(AThirdTable,locThirdActualLen + 50);
  3394. i := locThirdActualLen;
  3395. AThirdTable[i] := locTableItem3;
  3396. Inc(locThirdActualLen);
  3397. end;
  3398. locTableItem2[locLowByte0] := i;
  3399. end;
  3400. i := IndexOf(@locTableItem2,ASecondTable);
  3401. if (i = -1) then begin
  3402. i := Length(ASecondTable);
  3403. SetLength(ASecondTable,(i + 1));
  3404. ASecondTable[i] := locTableItem2;
  3405. end;
  3406. AFirstTable[locHighByte] := i;
  3407. end;
  3408. SetLength(AThirdTable,locThirdActualLen);
  3409. end;
  3410. procedure GenerateOBmpTables(
  3411. ADest : TStream;
  3412. var AFirstTable : TOBmpFirstTable;
  3413. var ASecondTable : TOBmpSecondTable
  3414. );
  3415. procedure AddLine(const ALine : ansistring);
  3416. var
  3417. buffer : ansistring;
  3418. begin
  3419. buffer := ALine + sLineBreak;
  3420. ADest.Write(buffer[1],Length(buffer));
  3421. end;
  3422. var
  3423. i, j, c : Integer;
  3424. locLine : string;
  3425. begin
  3426. AddLine('const');
  3427. AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
  3428. locLine := '';
  3429. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3430. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3431. if (((i+1) mod 16) = 0) then begin
  3432. locLine := ' ' + locLine;
  3433. AddLine(locLine);
  3434. locLine := '';
  3435. end;
  3436. end;
  3437. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3438. locLine := ' ' + locLine;
  3439. AddLine(locLine);
  3440. AddLine(' );' + sLineBreak);
  3441. AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
  3442. c := High(ASecondTable);
  3443. for i := Low(ASecondTable) to c do begin
  3444. locLine := '';
  3445. for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
  3446. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  3447. if (((j+1) mod 16) = 0) then begin
  3448. if (i = c) and (j = High(TOBmpSecondTableItem)) then
  3449. Delete(locLine,Length(locLine),1);
  3450. locLine := ' ' + locLine;
  3451. AddLine(locLine);
  3452. locLine := '';
  3453. end;
  3454. end;
  3455. end;
  3456. AddLine(' );' + sLineBreak);
  3457. end;
  3458. //----------------------------------
  3459. procedure Generate3lvlOBmpTables(
  3460. ADest : TStream;
  3461. var AFirstTable : T3lvlOBmp1Table;
  3462. var ASecondTable : T3lvlOBmp2Table;
  3463. var AThirdTable : T3lvlOBmp3Table
  3464. );
  3465. procedure AddLine(const ALine : ansistring);
  3466. var
  3467. buffer : ansistring;
  3468. begin
  3469. buffer := ALine + sLineBreak;
  3470. ADest.Write(buffer[1],Length(buffer));
  3471. end;
  3472. var
  3473. i, j, c : Integer;
  3474. locLine : string;
  3475. begin
  3476. AddLine('const');
  3477. AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
  3478. locLine := '';
  3479. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3480. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3481. if (((i+1) mod 16) = 0) then begin
  3482. locLine := ' ' + locLine;
  3483. AddLine(locLine);
  3484. locLine := '';
  3485. end;
  3486. end;
  3487. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3488. locLine := ' ' + locLine;
  3489. AddLine(locLine);
  3490. AddLine(' );' + sLineBreak);
  3491. AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
  3492. c := High(ASecondTable);
  3493. for i := Low(ASecondTable) to c do begin
  3494. locLine := '(';
  3495. for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
  3496. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  3497. Delete(locLine,Length(locLine),1);
  3498. locLine := ' ' + locLine + ')';
  3499. if (i < c) then
  3500. locLine := locLine + ',';
  3501. AddLine(locLine);
  3502. end;
  3503. AddLine(' );' + sLineBreak);
  3504. AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
  3505. c := High(AThirdTable);
  3506. for i := Low(AThirdTable) to c do begin
  3507. locLine := '(';
  3508. for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
  3509. locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
  3510. Delete(locLine,Length(locLine),1);
  3511. locLine := ' ' + locLine + ')';
  3512. if (i < c) then
  3513. locLine := locLine + ',';
  3514. AddLine(locLine);
  3515. end;
  3516. AddLine(' );' + sLineBreak);
  3517. end;
  3518. function GetProp(
  3519. const AHighS,
  3520. ALowS : Word;
  3521. const AProps : TPropRecArray;
  3522. var AFirstTable : TOBmpFirstTable;
  3523. var ASecondTable : TOBmpSecondTable
  3524. ): PPropRec;
  3525. begin
  3526. Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
  3527. end;
  3528. function GetProp(
  3529. const AHighS,
  3530. ALowS : Word;
  3531. const AProps : TPropRecArray;
  3532. var AFirstTable : T3lvlOBmp1Table;
  3533. var ASecondTable : T3lvlOBmp2Table;
  3534. var AThirdTable : T3lvlOBmp3Table
  3535. ): PPropRec;
  3536. begin
  3537. Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
  3538. //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
  3539. end;
  3540. { TUCA_PropItemContextTreeRec }
  3541. function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
  3542. begin
  3543. if (Size = 0) then
  3544. Result := nil
  3545. else
  3546. Result := PUCA_PropItemContextTreeNodeRec(
  3547. PtrUInt(
  3548. PtrUInt(@Self) + SizeOf(UInt24){Size}
  3549. )
  3550. );
  3551. end;
  3552. { TUCA_LineContextRec }
  3553. procedure TUCA_LineContextRec.Clear;
  3554. begin
  3555. Data := nil
  3556. end;
  3557. procedure TUCA_LineContextRec.Assign(ASource : TUCA_LineContextRec);
  3558. var
  3559. c, i : Integer;
  3560. begin
  3561. c := Length(ASource.Data);
  3562. SetLength(Self.Data,c);
  3563. for i := 0 to c-1 do
  3564. Self.Data[i].Assign(ASource.Data[i]);
  3565. end;
  3566. function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
  3567. begin
  3568. Result.Clear();
  3569. Result.Assign(Self);
  3570. end;
  3571. { TUCA_LineContextItemRec }
  3572. procedure TUCA_LineContextItemRec.Clear();
  3573. begin
  3574. CodePoints := nil;
  3575. Weights := nil;
  3576. end;
  3577. procedure TUCA_LineContextItemRec.Assign(ASource : TUCA_LineContextItemRec);
  3578. begin
  3579. Self.CodePoints := Copy(ASource.CodePoints);
  3580. Self.Weights := Copy(ASource.Weights);
  3581. end;
  3582. function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
  3583. begin
  3584. Result.Clear();
  3585. Result.Assign(Self);
  3586. end;
  3587. { TUCA_LineRec }
  3588. procedure TUCA_LineRec.Clear;
  3589. begin
  3590. CodePoints := nil;
  3591. Weights := nil;
  3592. Deleted := False;
  3593. Stored := False;
  3594. Context.Clear();
  3595. end;
  3596. procedure TUCA_LineRec.Assign(ASource : TUCA_LineRec);
  3597. begin
  3598. Self.CodePoints := Copy(ASource.CodePoints);
  3599. Self.Weights := Copy(ASource.Weights);
  3600. Self.Deleted := ASource.Deleted;
  3601. Self.Stored := ASource.Stored;
  3602. Self.Context.Assign(ASource.Context);
  3603. end;
  3604. function TUCA_LineRec.Clone : TUCA_LineRec;
  3605. begin
  3606. Result.Clear();
  3607. Result.Assign(Self);
  3608. end;
  3609. function TUCA_LineRec.HasContext() : Boolean;
  3610. begin
  3611. Result := (Length(Context.Data) > 0);
  3612. end;
  3613. { TPropRec }
  3614. function TPropRec.GetCategory: TUnicodeCategory;
  3615. begin
  3616. Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
  3617. end;
  3618. procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
  3619. var
  3620. b : Byte;
  3621. begin
  3622. b := Ord(AValue);
  3623. b := b shl 3;
  3624. CategoryData := CategoryData or b;
  3625. //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
  3626. end;
  3627. function TPropRec.GetWhiteSpace: Boolean;
  3628. begin
  3629. Result := IsBitON(CategoryData,0);
  3630. end;
  3631. procedure TPropRec.SetWhiteSpace(AValue: Boolean);
  3632. begin
  3633. SetBit(CategoryData,0,AValue);
  3634. end;
  3635. function TPropRec.GetHangulSyllable: Boolean;
  3636. begin
  3637. Result := IsBitON(CategoryData,1);
  3638. end;
  3639. procedure TPropRec.SetHangulSyllable(AValue: Boolean);
  3640. begin
  3641. SetBit(CategoryData,1,AValue);
  3642. end;
  3643. { TUCA_PropItemRec }
  3644. function TUCA_PropItemRec.GetWeightSize : Word;
  3645. var
  3646. c : Integer;
  3647. begin
  3648. c := WeightLength;
  3649. if (c = 0) then
  3650. exit(0);
  3651. Result := c*SizeOf(TUCA_PropWeights);
  3652. if IsWeightCompress_1() then
  3653. Result := Result - 1;
  3654. if IsWeightCompress_2() then
  3655. Result := Result - 1;
  3656. end;
  3657. function TUCA_PropItemRec.HasCodePoint(): Boolean;
  3658. begin
  3659. Result := IsBitON(Flags,FLAG_CODEPOINT);
  3660. end;
  3661. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  3662. var
  3663. c : Integer;
  3664. p : PByte;
  3665. pd : PUCA_PropWeights;
  3666. begin
  3667. c := WeightLength;
  3668. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  3669. pd := ADest;
  3670. pd^.Weights[0] := PWord(p)^;
  3671. p := p + 2;
  3672. if not IsWeightCompress_1() then begin
  3673. pd^.Weights[1] := PWord(p)^;
  3674. p := p + 2;
  3675. end else begin
  3676. pd^.Weights[1] := p^;
  3677. p := p + 1;
  3678. end;
  3679. if not IsWeightCompress_2() then begin
  3680. pd^.Weights[2] := PWord(p)^;
  3681. p := p + 2;
  3682. end else begin
  3683. pd^.Weights[2] := p^;
  3684. p := p + 1;
  3685. end;
  3686. if (c > 1) then
  3687. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  3688. end;
  3689. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  3690. begin
  3691. Result := SizeOf(TUCA_PropItemRec);
  3692. if (WeightLength > 0) then begin
  3693. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  3694. if IsWeightCompress_1() then
  3695. Result := Result - 1;
  3696. if IsWeightCompress_2() then
  3697. Result := Result - 1;
  3698. end;
  3699. if HasCodePoint() then
  3700. Result := Result + SizeOf(UInt24);
  3701. if Contextual then
  3702. Result := Result + Cardinal(GetContext()^.Size);
  3703. end;
  3704. procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
  3705. begin
  3706. SetBit(Flags,FLAG_CONTEXTUAL,AValue);
  3707. end;
  3708. function TUCA_PropItemRec.GetContextual : Boolean;
  3709. begin
  3710. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  3711. end;
  3712. function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
  3713. var
  3714. p : PtrUInt;
  3715. begin
  3716. if not Contextual then
  3717. exit(nil);
  3718. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  3719. if IsBitON(Flags,FLAG_CODEPOINT) then
  3720. p := p + SizeOf(UInt24);
  3721. Result := PUCA_PropItemContextTreeRec(p);
  3722. end;
  3723. procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
  3724. begin
  3725. SetBit(Flags,FLAG_DELETION,AValue);
  3726. end;
  3727. function TUCA_PropItemRec.IsDeleted: Boolean;
  3728. begin
  3729. Result := IsBitON(Flags,FLAG_DELETION);
  3730. end;
  3731. function TUCA_PropItemRec.IsValid() : Boolean;
  3732. begin
  3733. Result := IsBitON(Flags,FLAG_VALID);
  3734. end;
  3735. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  3736. begin
  3737. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  3738. end;
  3739. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  3740. begin
  3741. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  3742. end;
  3743. function TUCA_PropItemRec.GetCodePoint: UInt24;
  3744. begin
  3745. if HasCodePoint() then begin
  3746. if Contextual then
  3747. Result := PUInt24(
  3748. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  3749. Cardinal(GetContext()^.Size)
  3750. )^
  3751. else
  3752. Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  3753. end else begin
  3754. raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  3755. end
  3756. end;
  3757. function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
  3758. var
  3759. a, b : PUCA_LineContextItemRec;
  3760. i, hb : Integer;
  3761. begin
  3762. if (Item1 = Item2) then
  3763. exit(0);
  3764. if (Item1 = nil) then
  3765. exit(-1);
  3766. if (Item2 = nil) then
  3767. exit(1);
  3768. a := Item1;
  3769. b := Item2;
  3770. if (a^.CodePoints = b^.CodePoints) then
  3771. exit(0);
  3772. Result := 1;
  3773. hb := Length(b^.CodePoints) - 1;
  3774. for i := 0 to Length(a^.CodePoints) - 1 do begin
  3775. if (i > hb) then
  3776. exit;
  3777. if (a^.CodePoints[i] < b^.CodePoints[i]) then
  3778. exit(-1);
  3779. if (a^.CodePoints[i] > b^.CodePoints[i]) then
  3780. exit(1);
  3781. end;
  3782. if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
  3783. exit(0);
  3784. exit(-1);
  3785. end;
  3786. function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
  3787. var
  3788. r : TAVLTree;
  3789. i : Integer;
  3790. begin
  3791. r := TAVLTree.Create(@avl_CompareCodePoints);
  3792. try
  3793. for i := 0 to Length(AContext^.Data) - 1 do
  3794. r.Add(@AContext^.Data[i]);
  3795. Result := r;
  3796. except
  3797. FreeAndNil(r);
  3798. raise;
  3799. end;
  3800. end;
  3801. function ConstructContextTree(
  3802. const AContext : PUCA_LineContextRec;
  3803. var ADestBuffer;
  3804. const ADestBufferLength : Cardinal
  3805. ) : PUCA_PropItemContextTreeRec;
  3806. function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
  3807. var
  3808. kitem : PUCA_LineContextItemRec;
  3809. begin
  3810. if (AItem = nil) then
  3811. exit(0);
  3812. kitem := AItem.Data;
  3813. Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
  3814. SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
  3815. SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
  3816. (Length(kitem^.CodePoints)*SizeOf(UInt24)) +
  3817. SizeOf(PUCA_PropItemContextRec^.WeightCount) +
  3818. (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
  3819. end;
  3820. function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
  3821. begin
  3822. if (AItem = nil) then
  3823. exit(0);
  3824. Result := CalcItemOnlySize(AItem);
  3825. if (AItem.Left <> nil) then
  3826. Result := Result + CalcItemSize(AItem.Left);
  3827. if (AItem.Right <> nil) then
  3828. Result := Result + CalcItemSize(AItem.Right);
  3829. end;
  3830. function CalcSize(AData : TAVLTree) : Cardinal;
  3831. begin
  3832. Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
  3833. end;
  3834. function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
  3835. var
  3836. k : Integer;
  3837. kitem : PUCA_LineContextItemRec;
  3838. kpcp : PUInt24;
  3839. kpw : PUCA_PropWeights;
  3840. pextra : PtrUInt;
  3841. pnext : PUCA_PropItemContextTreeNodeRec;
  3842. begin
  3843. kitem := ASource.Data;
  3844. ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
  3845. ADest^.Data.WeightCount := Length(kitem^.Weights);
  3846. pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
  3847. SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
  3848. if (ADest^.Data.CodePointCount > 0) then begin
  3849. kpcp := PUInt24(pextra);
  3850. for k := 0 to ADest^.Data.CodePointCount - 1 do begin
  3851. kpcp^ := kitem^.CodePoints[k];
  3852. Inc(kpcp);
  3853. end;
  3854. end;
  3855. if (ADest^.Data.WeightCount > 0) then begin
  3856. kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
  3857. for k := 0 to ADest^.Data.WeightCount - 1 do begin
  3858. kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
  3859. kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
  3860. kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
  3861. Inc(kpw);
  3862. end;
  3863. end;
  3864. Result := CalcItemOnlySize(ASource);
  3865. if (ASource.Left <> nil) then begin
  3866. pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
  3867. ADest^.Left := Result;
  3868. Result := Result + ConstructItem(ASource.Left,pnext);
  3869. end else begin
  3870. ADest^.Left := 0;
  3871. end;
  3872. if (ASource.Right <> nil) then begin
  3873. pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
  3874. ADest^.Right := Result;
  3875. Result := Result + ConstructItem(ASource.Right,pnext);
  3876. end else begin
  3877. ADest^.Right := 0;
  3878. end;
  3879. end;
  3880. var
  3881. c : PtrUInt;
  3882. r : PUCA_PropItemContextTreeRec;
  3883. p : PUCA_PropItemContextTreeNodeRec;
  3884. tempTree : TAVLTree;
  3885. begin
  3886. tempTree := ConstructAvlContextTree(AContext);
  3887. try
  3888. c := CalcSize(tempTree);
  3889. if (ADestBufferLength > 0) and (c > ADestBufferLength) then
  3890. raise Exception.Create(SInsufficientMemoryBuffer);
  3891. r := @ADestBuffer;
  3892. r^.Size := c;
  3893. p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
  3894. ConstructItem(tempTree.Root,p);
  3895. finally
  3896. tempTree.Free();
  3897. end;
  3898. Result := r;
  3899. end;
  3900. procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
  3901. begin
  3902. ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
  3903. ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
  3904. ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
  3905. ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
  3906. ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
  3907. ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
  3908. ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
  3909. end;
  3910. procedure ReverseBytes(var AData; const ALength : Integer);
  3911. var
  3912. i,j : PtrInt;
  3913. c : Byte;
  3914. p : PByte;
  3915. begin
  3916. if (ALength = 1) then
  3917. exit;
  3918. p := @AData;
  3919. j := ALength div 2;
  3920. for i := 0 to Pred(j) do begin
  3921. c := p[i];
  3922. p[i] := p[(ALength - 1 ) - i];
  3923. p[(ALength - 1 ) - i] := c;
  3924. end;
  3925. end;
  3926. procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
  3927. var
  3928. p : PByte;
  3929. i : PtrInt;
  3930. begin
  3931. if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
  3932. p := @AValue;
  3933. for i := 0 to Pred(AArrayLength) do begin
  3934. ReverseBytes(p^,AItemSize);
  3935. Inc(p,AItemSize);
  3936. end;
  3937. end;
  3938. end;
  3939. procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
  3940. var
  3941. k : PtrUInt;
  3942. p_s, p_d : PByte;
  3943. begin
  3944. d^.Left := s^.Left;
  3945. ReverseBytes(d^.Left,SizeOf(d^.Left));
  3946. d^.Right := s^.Right;
  3947. ReverseBytes(d^.Right,SizeOf(d^.Right));
  3948. d^.Data.CodePointCount := s^.Data.CodePointCount;
  3949. ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
  3950. d^.Data.WeightCount := s^.Data.WeightCount;
  3951. ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
  3952. k := SizeOf(TUCA_PropItemContextTreeNodeRec);
  3953. p_s := PByte(PtrUInt(s) + k);
  3954. p_d := PByte(PtrUInt(d) + k);
  3955. k := (s^.Data.CodePointCount*SizeOf(UInt24));
  3956. Move(p_s^,p_d^, k);
  3957. ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
  3958. p_s := PByte(PtrUInt(p_s) + k);
  3959. p_d := PByte(PtrUInt(p_d) + k);
  3960. k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
  3961. Move(p_s^,p_d^,k);
  3962. ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
  3963. if (s^.Left > 0) then
  3964. ReverseContextNodeFromNativeEndian(
  3965. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
  3966. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
  3967. );
  3968. if (s^.Right > 0) then
  3969. ReverseContextNodeFromNativeEndian(
  3970. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
  3971. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
  3972. );
  3973. end;
  3974. procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
  3975. var
  3976. k : PtrUInt;
  3977. begin
  3978. d^.Size := s^.Size;
  3979. ReverseBytes(d^.Size,SizeOf(d^.Size));
  3980. if (s^.Size = 0) then
  3981. exit;
  3982. k := SizeOf(s^.Size);
  3983. ReverseContextNodeFromNativeEndian(
  3984. PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
  3985. PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
  3986. );
  3987. end;
  3988. procedure ReverseFromNativeEndian(
  3989. const AData : PUCA_PropItemRec;
  3990. const ADataLen : Cardinal;
  3991. const ADest : PUCA_PropItemRec
  3992. );
  3993. var
  3994. s, d : PUCA_PropItemRec;
  3995. sCtx, dCtx : PUCA_PropItemContextTreeRec;
  3996. dataEnd : PtrUInt;
  3997. k, i : PtrUInt;
  3998. p_s, p_d : PByte;
  3999. pw_s, pw_d : PUCA_PropWeights;
  4000. begin
  4001. dataEnd := PtrUInt(AData) + ADataLen;
  4002. s := AData;
  4003. d := ADest;
  4004. while True do begin
  4005. d^.WeightLength := s^.WeightLength;
  4006. ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
  4007. d^.ChildCount := s^.ChildCount;
  4008. ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
  4009. d^.Size := s^.Size;
  4010. ReverseBytes(d^.Size,SizeOf(d^.Size));
  4011. d^.Flags := s^.Flags;
  4012. ReverseBytes(d^.Flags,SizeOf(d^.Flags));
  4013. if s^.Contextual then begin
  4014. k := SizeOf(TUCA_PropItemRec);
  4015. if s^.HasCodePoint() then
  4016. k := k + SizeOf(UInt24);
  4017. sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
  4018. dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
  4019. ReverseContextFromNativeEndian(sCtx,dCtx);
  4020. end;
  4021. if s^.HasCodePoint() then begin
  4022. if s^.Contextual then
  4023. k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
  4024. else
  4025. k := s^.GetSelfOnlySize() - SizeOf(UInt24);
  4026. p_s := PByte(PtrUInt(s) + k);
  4027. p_d := PByte(PtrUInt(d) + k);
  4028. Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
  4029. ReverseBytes(p_d^,SizeOf(UInt24));
  4030. end;
  4031. if (s^.WeightLength > 0) then begin
  4032. k := SizeOf(TUCA_PropItemRec);
  4033. p_s := PByte(PtrUInt(s) + k);
  4034. p_d := PByte(PtrUInt(d) + k);
  4035. k := SizeOf(Word);
  4036. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4037. ReverseBytes(Unaligned(p_d^),k);
  4038. p_s := PByte(PtrUInt(p_s) + k);
  4039. p_d := PByte(PtrUInt(p_d) + k);
  4040. if s^.IsWeightCompress_1() then begin
  4041. k := SizeOf(Byte);
  4042. PByte(p_d)^ := PByte(p_s)^;
  4043. end else begin
  4044. k := SizeOf(Word);
  4045. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4046. end;
  4047. ReverseBytes(p_d^,k);
  4048. p_s := PByte(PtrUInt(p_s) + k);
  4049. p_d := PByte(PtrUInt(p_d) + k);
  4050. if s^.IsWeightCompress_2() then begin
  4051. k := SizeOf(Byte);
  4052. PByte(p_d)^ := PByte(p_s)^;
  4053. end else begin
  4054. k := SizeOf(Word);
  4055. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4056. end;
  4057. ReverseBytes(p_d^,k);
  4058. if (s^.WeightLength > 1) then begin
  4059. pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
  4060. pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
  4061. for i := 1 to s^.WeightLength - 1 do begin
  4062. pw_d^.Weights[0] := pw_s^.Weights[0];
  4063. pw_d^.Weights[1] := pw_s^.Weights[1];
  4064. pw_d^.Weights[2] := pw_s^.Weights[2];
  4065. ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
  4066. Inc(pw_s);
  4067. Inc(pw_d);
  4068. end;
  4069. end;
  4070. end;
  4071. k := s^.GetSelfOnlySize();
  4072. s := PUCA_PropItemRec(PtrUInt(s)+k);
  4073. d := PUCA_PropItemRec(PtrUInt(d)+k);
  4074. if (PtrUInt(s) >= dataEnd) then
  4075. Break;
  4076. end;
  4077. if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
  4078. raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
  4079. end;
  4080. //------------------------------------------------------------------------------
  4081. procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
  4082. var
  4083. k : PtrUInt;
  4084. p_s, p_d : PByte;
  4085. begin
  4086. d^.Left := s^.Left;
  4087. ReverseBytes(d^.Left,SizeOf(d^.Left));
  4088. d^.Right := s^.Right;
  4089. ReverseBytes(d^.Right,SizeOf(d^.Right));
  4090. d^.Data.CodePointCount := s^.Data.CodePointCount;
  4091. ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
  4092. d^.Data.WeightCount := s^.Data.WeightCount;
  4093. ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
  4094. k := SizeOf(TUCA_PropItemContextTreeNodeRec);
  4095. p_s := PByte(PtrUInt(s) + k);
  4096. p_d := PByte(PtrUInt(d) + k);
  4097. k := (d^.Data.CodePointCount*SizeOf(UInt24));
  4098. Move(p_s^,p_d^, k);
  4099. ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
  4100. p_s := PByte(PtrUInt(p_s) + k);
  4101. p_d := PByte(PtrUInt(p_d) + k);
  4102. k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
  4103. Move(p_s^,p_d^,k);
  4104. ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
  4105. if (d^.Left > 0) then
  4106. ReverseContextNodeToNativeEndian(
  4107. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
  4108. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
  4109. );
  4110. if (d^.Right > 0) then
  4111. ReverseContextNodeToNativeEndian(
  4112. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
  4113. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
  4114. );
  4115. end;
  4116. procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
  4117. var
  4118. k : PtrUInt;
  4119. begin
  4120. d^.Size := s^.Size;
  4121. ReverseBytes(d^.Size,SizeOf(d^.Size));
  4122. if (s^.Size = 0) then
  4123. exit;
  4124. k := SizeOf(s^.Size);
  4125. ReverseContextNodeToNativeEndian(
  4126. PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
  4127. PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
  4128. );
  4129. end;
  4130. procedure ReverseToNativeEndian(
  4131. const AData : PUCA_PropItemRec;
  4132. const ADataLen : Cardinal;
  4133. const ADest : PUCA_PropItemRec
  4134. );
  4135. var
  4136. s, d : PUCA_PropItemRec;
  4137. sCtx, dCtx : PUCA_PropItemContextTreeRec;
  4138. dataEnd : PtrUInt;
  4139. k, i : PtrUInt;
  4140. p_s, p_d : PByte;
  4141. pw_s, pw_d : PUCA_PropWeights;
  4142. begin
  4143. dataEnd := PtrUInt(AData) + ADataLen;
  4144. s := AData;
  4145. d := ADest;
  4146. while True do begin
  4147. d^.WeightLength := s^.WeightLength;
  4148. ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
  4149. d^.ChildCount := s^.ChildCount;
  4150. ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
  4151. d^.Size := s^.Size;
  4152. ReverseBytes(d^.Size,SizeOf(d^.Size));
  4153. d^.Flags := s^.Flags;
  4154. ReverseBytes(d^.Flags,SizeOf(d^.Flags));
  4155. if d^.Contextual then begin
  4156. k := SizeOf(TUCA_PropItemRec);
  4157. if d^.HasCodePoint() then
  4158. k := k + SizeOf(UInt24);
  4159. sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
  4160. dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
  4161. ReverseContextToNativeEndian(sCtx,dCtx);
  4162. end;
  4163. if d^.HasCodePoint() then begin
  4164. if d^.Contextual then
  4165. k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
  4166. else
  4167. k := d^.GetSelfOnlySize() - SizeOf(UInt24);
  4168. p_s := PByte(PtrUInt(s) + k);
  4169. p_d := PByte(PtrUInt(d) + k);
  4170. Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
  4171. ReverseBytes(p_d^,SizeOf(UInt24));
  4172. end;
  4173. if (d^.WeightLength > 0) then begin
  4174. k := SizeOf(TUCA_PropItemRec);
  4175. p_s := PByte(PtrUInt(s) + k);
  4176. p_d := PByte(PtrUInt(d) + k);
  4177. k := SizeOf(Word);
  4178. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4179. ReverseBytes(p_d^,k);
  4180. p_s := PByte(PtrUInt(p_s) + k);
  4181. p_d := PByte(PtrUInt(p_d) + k);
  4182. if d^.IsWeightCompress_1() then begin
  4183. k := SizeOf(Byte);
  4184. PByte(p_d)^ := PByte(p_s)^;
  4185. end else begin
  4186. k := SizeOf(Word);
  4187. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4188. end;
  4189. ReverseBytes(p_d^,k);
  4190. p_s := PByte(PtrUInt(p_s) + k);
  4191. p_d := PByte(PtrUInt(p_d) + k);
  4192. if d^.IsWeightCompress_2() then begin
  4193. k := SizeOf(Byte);
  4194. PByte(p_d)^ := PByte(p_s)^;
  4195. end else begin
  4196. k := SizeOf(Word);
  4197. Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
  4198. end;
  4199. ReverseBytes(p_d^,k);
  4200. if (d^.WeightLength > 1) then begin
  4201. pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
  4202. pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
  4203. for i := 1 to d^.WeightLength - 1 do begin
  4204. pw_d^.Weights[0] := pw_s^.Weights[0];
  4205. pw_d^.Weights[1] := pw_s^.Weights[1];
  4206. pw_d^.Weights[2] := pw_s^.Weights[2];
  4207. ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
  4208. Inc(pw_s);
  4209. Inc(pw_d);
  4210. end;
  4211. end;
  4212. end;
  4213. k := d^.GetSelfOnlySize();
  4214. s := PUCA_PropItemRec(PtrUInt(s)+k);
  4215. d := PUCA_PropItemRec(PtrUInt(d)+k);
  4216. if (PtrUInt(s) >= dataEnd) then
  4217. Break;
  4218. end;
  4219. if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
  4220. raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
  4221. end;
  4222. procedure Check(const ACondition : Boolean; const AMsg : string);overload;
  4223. begin
  4224. if not ACondition then
  4225. raise Exception.Create(AMsg);
  4226. end;
  4227. procedure Check(
  4228. const ACondition : Boolean;
  4229. const AFormatMsg : string;
  4230. const AArgs : array of const
  4231. );overload;
  4232. begin
  4233. Check(ACondition,Format(AFormatMsg,AArgs));
  4234. end;
  4235. procedure Check(const ACondition : Boolean);overload;
  4236. begin
  4237. Check(ACondition,'Check failed.')
  4238. end;
  4239. procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
  4240. var
  4241. i : Integer;
  4242. begin
  4243. if (ALength > 0) then begin
  4244. for i := 0 to ALength - 1 do begin
  4245. Check(a[i].Weights[0]=b[i].Weights[0]);
  4246. Check(a[i].Weights[1]=b[i].Weights[1]);
  4247. Check(a[i].Weights[2]=b[i].Weights[2]);
  4248. end;
  4249. end;
  4250. end;
  4251. procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
  4252. var
  4253. i : Integer;
  4254. begin
  4255. if (ALength > 0) then begin
  4256. for i := 0 to ALength - 1 do
  4257. Check(a[i]=b[i]);
  4258. end;
  4259. end;
  4260. procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
  4261. var
  4262. a, b : PUCA_PropItemContextTreeNodeRec;
  4263. k : Cardinal;
  4264. begin
  4265. if (AProp1=nil) then begin
  4266. Check(AProp2=nil);
  4267. exit;
  4268. end;
  4269. a := AProp1;
  4270. b := AProp2;
  4271. Check(a^.Left=b^.Left);
  4272. Check(a^.Right=b^.Right);
  4273. Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
  4274. Check(a^.Data.WeightCount=b^.Data.WeightCount);
  4275. k := SizeOf(a^.Data);
  4276. CompareCodePoints(
  4277. PUInt24(PtrUInt(a)+k),
  4278. PUInt24(PtrUInt(b)+k),
  4279. a^.Data.CodePointCount
  4280. );
  4281. k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
  4282. CompareWeights(
  4283. PUCA_PropWeights(PtrUInt(a)+k),
  4284. PUCA_PropWeights(PtrUInt(b)+k),
  4285. a^.Data.WeightCount
  4286. );
  4287. if (a^.Left > 0) then begin
  4288. k := a^.Left;
  4289. CompareContextNode(
  4290. PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
  4291. PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
  4292. );
  4293. end;
  4294. if (a^.Right > 0) then begin
  4295. k := a^.Right;
  4296. CompareContextNode(
  4297. PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
  4298. PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
  4299. );
  4300. end;
  4301. end;
  4302. procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
  4303. var
  4304. a, b : PUCA_PropItemContextTreeNodeRec;
  4305. k : Integer;
  4306. begin
  4307. if (AProp1=nil) then begin
  4308. Check(AProp2=nil);
  4309. exit;
  4310. end;
  4311. Check(AProp1^.Size=AProp2^.Size);
  4312. k := Cardinal(AProp1^.Size);
  4313. a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
  4314. b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
  4315. CompareContextNode(a,b);
  4316. end;
  4317. procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
  4318. var
  4319. a, b, pend : PUCA_PropItemRec;
  4320. wa, wb : array of TUCA_PropWeights;
  4321. k : Integer;
  4322. begin
  4323. if (ADataLen <= 0) then
  4324. exit;
  4325. a := PUCA_PropItemRec(AProp1);
  4326. b := PUCA_PropItemRec(AProp2);
  4327. pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
  4328. while (a<pend) do begin
  4329. Check(a^.WeightLength=b^.WeightLength);
  4330. Check(a^.ChildCount=b^.ChildCount);
  4331. Check(a^.Size=b^.Size);
  4332. Check(a^.Flags=b^.Flags);
  4333. if a^.HasCodePoint() then
  4334. Check(a^.CodePoint = b^.CodePoint);
  4335. if (a^.WeightLength > 0) then begin
  4336. k := a^.WeightLength;
  4337. SetLength(wa,k);
  4338. SetLength(wb,k);
  4339. a^.GetWeightArray(@wa[0]);
  4340. b^.GetWeightArray(@wb[0]);
  4341. CompareWeights(@wa[0],@wb[0],k);
  4342. end;
  4343. if a^.Contextual then
  4344. CompareContext(a^.GetContext(),b^.GetContext());
  4345. Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
  4346. k := a^.GetSelfOnlySize();
  4347. a := PUCA_PropItemRec(PtrUInt(a)+k);
  4348. b := PUCA_PropItemRec(PtrUInt(b)+k);
  4349. end;
  4350. end;
  4351. initialization
  4352. FS := DefaultFormatSettings;
  4353. FS.DecimalSeparator := '.';
  4354. end.