helper.pas 130 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562
  1. { Unicode parser helper unit.
  2. Copyright (c) 2012 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
  15. unit helper;
  16. {$mode delphi}
  17. {$H+}
  18. {$PACKENUM 1}
  19. {$pointermath on}
  20. {$typedaddress on}
  21. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  22. interface
  23. uses
  24. Classes, SysUtils, StrUtils;
  25. const
  26. SLicenseText =
  27. ' { Unicode implementation tables. ' + sLineBreak +
  28. ' ' + sLineBreak +
  29. ' Copyright (c) 2013 by Inoussa OUEDRAOGO ' + sLineBreak +
  30. ' ' + sLineBreak +
  31. ' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
  32. ' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
  33. ' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
  34. ' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
  35. ' Files or Software without restriction, including without ' + sLineBreak +
  36. ' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
  37. ' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
  38. ' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
  39. ' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
  40. ' notice(s) and this permission notice appear with all copies ' + sLineBreak +
  41. ' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
  42. ' notice(s) and this permission notice appear in associated ' + sLineBreak +
  43. ' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
  44. ' Data File or in the Software as well as in the documentation ' + sLineBreak +
  45. ' associated with the Data File(s) or Software that the data or ' + sLineBreak +
  46. ' software has been modified. ' + sLineBreak +
  47. ' ' + sLineBreak +
  48. ' ' + sLineBreak +
  49. ' This program is distributed in the hope that it will be useful, ' + sLineBreak +
  50. ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
  51. ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
  52. type
  53. // Unicode General Category
  54. TUnicodeCategory = (
  55. ucUppercaseLetter, // Lu = Letter, uppercase
  56. ucLowercaseLetter, // Ll = Letter, lowercase
  57. ucTitlecaseLetter, // Lt = Letter, titlecase
  58. ucModifierLetter, // Lm = Letter, modifier
  59. ucOtherLetter, // Lo = Letter, other
  60. ucNonSpacingMark, // Mn = Mark, nonspacing
  61. ucCombiningMark, // Mc = Mark, spacing combining
  62. ucEnclosingMark, // Me = Mark, enclosing
  63. ucDecimalNumber, // Nd = Number, decimal digit
  64. ucLetterNumber, // Nl = Number, letter
  65. ucOtherNumber, // No = Number, other
  66. ucConnectPunctuation, // Pc = Punctuation, connector
  67. ucDashPunctuation, // Pd = Punctuation, dash
  68. ucOpenPunctuation, // Ps = Punctuation, open
  69. ucClosePunctuation, // Pe = Punctuation, close
  70. ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
  71. ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
  72. ucOtherPunctuation, // Po = Punctuation, other
  73. ucMathSymbol, // Sm = Symbol, math
  74. ucCurrencySymbol, // Sc = Symbol, currency
  75. ucModifierSymbol, // Sk = Symbol, modifier
  76. ucOtherSymbol, // So = Symbol, other
  77. ucSpaceSeparator, // Zs = Separator, space
  78. ucLineSeparator, // Zl = Separator, line
  79. ucParagraphSeparator, // Zp = Separator, paragraph
  80. ucControl, // Cc = Other, control
  81. ucFormat, // Cf = Other, format
  82. ucSurrogate, // Cs = Other, surrogate
  83. ucPrivateUse, // Co = Other, private use
  84. ucUnassigned // Cn = Other, not assigned (including noncharacters)
  85. );
  86. TUInt24Rec = packed record
  87. public
  88. {$ifdef FPC_LITTLE_ENDIAN}
  89. byte0, byte1, byte2 : Byte;
  90. {$else FPC_LITTLE_ENDIAN}
  91. byte2, byte1, byte0 : Byte;
  92. {$endif FPC_LITTLE_ENDIAN}
  93. public
  94. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  95. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  96. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  97. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  98. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  99. class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  100. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  107. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  108. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  109. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  110. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  111. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  112. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  113. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  114. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  115. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  116. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  117. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  118. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  119. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  120. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  121. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  122. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  123. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  124. end;
  125. UInt24 = TUInt24Rec;
  126. PUInt24 = ^UInt24;
  127. TUnicodeCodePoint = Cardinal;
  128. TUnicodeCodePointArray = array of TUnicodeCodePoint;
  129. TDecompositionArray = array of TUnicodeCodePointArray;
  130. TNumericValue = Double;
  131. TNumericValueArray = array of TNumericValue;
  132. TBlockItemRec = packed record
  133. RangeStart : TUnicodeCodePoint;
  134. RangeEnd : TUnicodeCodePoint;
  135. Name : string[120];
  136. CanonicalName : string[120];
  137. end;
  138. TBlocks = array of TBlockItemRec;
  139. PPropRec = ^TPropRec;
  140. { TPropRec }
  141. TPropRec = packed record
  142. private
  143. function GetCategory : TUnicodeCategory;inline;
  144. procedure SetCategory(AValue : TUnicodeCategory);
  145. function GetWhiteSpace : Boolean;inline;
  146. procedure SetWhiteSpace(AValue : Boolean);
  147. function GetHangulSyllable : Boolean;inline;
  148. procedure SetHangulSyllable(AValue : Boolean);
  149. public
  150. CategoryData : Byte;
  151. PropID : Word;
  152. CCC : Byte; // Canonical Combining Class
  153. NumericIndex : Byte;
  154. SimpleUpperCase : UInt24;
  155. SimpleLowerCase : UInt24;
  156. DecompositionID : SmallInt;
  157. public
  158. property Category : TUnicodeCategory read GetCategory write SetCategory;
  159. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  160. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  161. end;
  162. TPropRecArray = array of TPropRec;
  163. TDecompositionIndexRec = packed record
  164. StartPosition : Word;
  165. Length : Byte;
  166. end;
  167. TDecompositionBook = packed record
  168. Index : array of TDecompositionIndexRec;
  169. CodePoints : array of TUnicodeCodePoint;
  170. end;
  171. PDataLineRec = ^TDataLineRec;
  172. TDataLineRec = record
  173. PropID : Integer;
  174. case LineType : Byte of
  175. 0 : (CodePoint : TUnicodeCodePoint);
  176. 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
  177. end;
  178. TDataLineRecArray = array of TDataLineRec;
  179. TCodePointRec = record
  180. case LineType : Byte of
  181. 0 : (CodePoint : TUnicodeCodePoint);
  182. 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
  183. end;
  184. TCodePointRecArray = array of TCodePointRec;
  185. TPropListLineRec = packed record
  186. CodePoint : TCodePointRec;
  187. PropName : string[123];
  188. end;
  189. TPropListLineRecArray = array of TPropListLineRec;
  190. TUCA_WeightRec = packed record
  191. Weights : array[0..3] of Cardinal;
  192. Variable : Boolean;
  193. end;
  194. TUCA_WeightRecArray = array of TUCA_WeightRec;
  195. TUCA_LineContextItemRec = packed record
  196. public
  197. CodePoints : TUnicodeCodePointArray;
  198. Weights : TUCA_WeightRecArray;
  199. public
  200. procedure Clear();
  201. procedure Assign(ASource : TUCA_LineContextItemRec);
  202. function Clone() : TUCA_LineContextItemRec;
  203. end;
  204. PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
  205. TUCA_LineContextRec = packed record
  206. public
  207. Data : array of TUCA_LineContextItemRec;
  208. public
  209. procedure Clear();
  210. procedure Assign(ASource : TUCA_LineContextRec);
  211. function Clone() : TUCA_LineContextRec;
  212. end;
  213. PUCA_LineContextRec = ^TUCA_LineContextRec;
  214. { TUCA_LineRec }
  215. TUCA_LineRec = packed record
  216. public
  217. CodePoints : TUnicodeCodePointArray;
  218. Weights : TUCA_WeightRecArray;
  219. Context : TUCA_LineContextRec;
  220. //Variable : Boolean;
  221. Deleted : Boolean;
  222. Stored : Boolean;
  223. public
  224. procedure Clear();
  225. procedure Assign(ASource : TUCA_LineRec);
  226. function Clone() : TUCA_LineRec;
  227. function HasContext() : Boolean;
  228. end;
  229. PUCA_LineRec = ^TUCA_LineRec;
  230. TUCA_VariableKind = (
  231. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  232. ucaIgnoreSP
  233. );
  234. TUCA_DataBook = packed record
  235. Version : string;
  236. VariableWeight : TUCA_VariableKind;
  237. Backwards : array[0..3] of Boolean;
  238. Lines : array of TUCA_LineRec;
  239. end;
  240. PUCA_DataBook = ^TUCA_DataBook;
  241. TUCA_DataBookIndex = array of Integer;
  242. type
  243. TUCA_PropWeights = packed record
  244. Weights : array[0..2] of Word;
  245. //Variable : Byte;
  246. end;
  247. PUCA_PropWeights = ^TUCA_PropWeights;
  248. TUCA_PropItemContextRec = packed record
  249. CodePointCount : Byte;
  250. WeightCount : Byte;
  251. //CodePoints : UInt24;
  252. //Weights : TUCA_PropWeights;
  253. end;
  254. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  255. TUCA_PropItemContextTreeNodeRec = packed record
  256. Left : Word;
  257. Right : Word;
  258. Data : TUCA_PropItemContextRec;
  259. end;
  260. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  261. TUCA_PropItemContextTreeRec = packed record
  262. public
  263. Size : UInt24;
  264. public
  265. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  266. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  267. end;
  268. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  269. { TUCA_PropItemRec }
  270. TUCA_PropItemRec = packed record
  271. private
  272. const FLAG_VALID = 0;
  273. const FLAG_CODEPOINT = 1;
  274. const FLAG_CONTEXTUAL = 2;
  275. const FLAG_DELETION = 3;
  276. const FLAG_COMPRESS_WEIGHT_1 = 6;
  277. const FLAG_COMPRESS_WEIGHT_2 = 7;
  278. private
  279. function GetWeightSize : Word;inline;
  280. public
  281. WeightLength : Byte;
  282. ChildCount : Byte;
  283. Size : Word;
  284. Flags : Byte;
  285. public
  286. function HasCodePoint() : Boolean;inline;
  287. function GetCodePoint() : UInt24;//inline;
  288. property CodePoint : UInt24 read GetCodePoint;
  289. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  290. procedure GetWeightArray(ADest : PUCA_PropWeights);
  291. function GetSelfOnlySize() : Cardinal;inline;
  292. procedure SetContextual(AValue : Boolean);inline;
  293. function GetContextual() : Boolean;inline;
  294. property Contextual : Boolean read GetContextual write setContextual;
  295. function GetContext() : PUCA_PropItemContextTreeRec;
  296. procedure SetDeleted(AValue : Boolean);inline;
  297. function IsDeleted() : Boolean;inline;
  298. function IsValid() : Boolean;inline;
  299. function IsWeightCompress_1() : Boolean;inline;
  300. function IsWeightCompress_2() : Boolean;inline;
  301. end;
  302. PUCA_PropItemRec = ^TUCA_PropItemRec;
  303. TUCA_PropIndexItem = packed record
  304. CodePoint : Cardinal;
  305. Position : Integer;
  306. end;
  307. PUCA_PropIndexItem = ^TUCA_PropIndexItem;
  308. TUCA_PropBook = packed record
  309. ItemSize : Integer;
  310. Index : array of TUCA_PropIndexItem;
  311. Items : PUCA_PropItemRec; //Native Endian
  312. ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
  313. VariableLowLimit : Word;
  314. VariableHighLimit : Word;
  315. end;
  316. PUCA_PropBook = ^TUCA_PropBook;
  317. TBmpFirstTable = array[0..255] of Byte;
  318. TBmpSecondTableItem = array[0..255] of Word;
  319. TBmpSecondTable = array of TBmpSecondTableItem;
  320. T3lvlBmp1Table = array[0..255] of Byte;
  321. T3lvlBmp2TableItem = array[0..15] of Word;
  322. T3lvlBmp2Table = array of T3lvlBmp2TableItem;
  323. T3lvlBmp3TableItem = array[0..15] of Word;
  324. T3lvlBmp3Table = array of T3lvlBmp3TableItem;
  325. TucaBmpFirstTable = array[0..255] of Byte;
  326. TucaBmpSecondTableItem = array[0..255] of Cardinal;
  327. TucaBmpSecondTable = array of TucaBmpSecondTableItem;
  328. PucaBmpFirstTable = ^TucaBmpFirstTable;
  329. PucaBmpSecondTable = ^TucaBmpSecondTable;
  330. const
  331. LOW_SURROGATE_BEGIN = Word($DC00);
  332. LOW_SURROGATE_END = Word($DFFF);
  333. LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
  334. HIGH_SURROGATE_BEGIN = Word($D800);
  335. HIGH_SURROGATE_END = Word($DBFF);
  336. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  337. type
  338. TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
  339. TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
  340. TOBmpSecondTable = array of TOBmpSecondTableItem;
  341. T3lvlOBmp1Table = array[0..1023] of Byte;
  342. T3lvlOBmp2TableItem = array[0..31] of Word;
  343. T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
  344. T3lvlOBmp3TableItem = array[0..31] of Word;
  345. T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
  346. TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
  347. TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
  348. TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
  349. PucaOBmpFirstTable = ^TucaOBmpFirstTable;
  350. PucaOBmpSecondTable = ^TucaOBmpSecondTable;
  351. type
  352. TEndianKind = (ekLittle, ekBig);
  353. const
  354. ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
  355. {$IFDEF ENDIAN_LITTLE}
  356. ENDIAN_NATIVE = ekLittle;
  357. ENDIAN_NON_NATIVE = ekBig;
  358. {$ENDIF ENDIAN_LITTLE}
  359. {$IFDEF ENDIAN_BIG}
  360. ENDIAN_NATIVE = ekBig;
  361. ENDIAN_NON_NATIVE = ekLittle;
  362. {$ENDIF ENDIAN_BIG}
  363. procedure GenerateLicenceText(ADest : TStream);
  364. function BoolToByte(AValue : Boolean): Byte;inline;
  365. function IsHangulSyllable(
  366. const ACodePoint : TUnicodeCodePoint;
  367. const AHangulList : TCodePointRecArray
  368. ) : Boolean;
  369. procedure ParseHangulSyllableTypes(
  370. ADataAStream : TMemoryStream;
  371. var ACodePointList : TCodePointRecArray
  372. );
  373. procedure ParseProps(
  374. ADataAStream : TMemoryStream;
  375. var APropList : TPropListLineRecArray
  376. );
  377. function FindCodePointsByProperty(
  378. const APropName : string;
  379. const APropList : TPropListLineRecArray
  380. ) : TCodePointRecArray;
  381. procedure ParseBlokcs(
  382. ADataAStream : TMemoryStream;
  383. var ABlocks : TBlocks
  384. );
  385. procedure ParseUCAFile(
  386. ADataAStream : TMemoryStream;
  387. var ABook : TUCA_DataBook
  388. );
  389. procedure MakeUCA_Props(
  390. ABook : PUCA_DataBook;
  391. out AProps : PUCA_PropBook
  392. );
  393. procedure FreeUcaBook(var ABook : PUCA_PropBook);
  394. procedure MakeUCA_BmpTables(
  395. var AFirstTable : TucaBmpFirstTable;
  396. var ASecondTable : TucaBmpSecondTable;
  397. const APropBook : PUCA_PropBook
  398. );
  399. procedure MakeUCA_OBmpTables(
  400. var AFirstTable : TucaOBmpFirstTable;
  401. var ASecondTable : TucaOBmpSecondTable;
  402. const APropBook : PUCA_PropBook
  403. );
  404. function GetPropPosition(
  405. const AHighS,
  406. ALowS : Word;
  407. const AFirstTable : PucaOBmpFirstTable;
  408. const ASecondTable : PucaOBmpSecondTable
  409. ): Integer;inline;overload;
  410. procedure GenerateUCA_Head(
  411. ADest : TStream;
  412. ABook : PUCA_DataBook;
  413. AProps : PUCA_PropBook
  414. );
  415. procedure GenerateUCA_BmpTables(
  416. AStream,
  417. ANativeEndianStream,
  418. ANonNativeEndianStream : TStream;
  419. var AFirstTable : TucaBmpFirstTable;
  420. var ASecondTable : TucaBmpSecondTable
  421. );
  422. procedure GenerateUCA_PropTable(
  423. ADest : TStream;
  424. const APropBook : PUCA_PropBook;
  425. const AEndian : TEndianKind
  426. );
  427. procedure GenerateUCA_OBmpTables(
  428. AStream,
  429. ANativeEndianStream,
  430. ANonNativeEndianStream : TStream;
  431. var AFirstTable : TucaOBmpFirstTable;
  432. var ASecondTable : TucaOBmpSecondTable
  433. );
  434. procedure Parse_UnicodeData(
  435. ADataAStream : TMemoryStream;
  436. var APropList : TPropRecArray;
  437. var ANumericTable : TNumericValueArray;
  438. var ADataLineList : TDataLineRecArray;
  439. var ADecomposition : TDecompositionArray;
  440. const AHangulList : TCodePointRecArray;
  441. const AWhiteSpaces : TCodePointRecArray
  442. );
  443. procedure MakeDecomposition(
  444. const ARawData : TDecompositionArray;
  445. var ABook : TDecompositionBook
  446. );
  447. procedure MakeBmpTables(
  448. var AFirstTable : TBmpFirstTable;
  449. var ASecondTable : TBmpSecondTable;
  450. const ADataLineList : TDataLineRecArray
  451. );
  452. procedure MakeBmpTables3Levels(
  453. var AFirstTable : T3lvlBmp1Table;
  454. var ASecondTable : T3lvlBmp2Table;
  455. var AThirdTable : T3lvlBmp3Table;
  456. const ADataLineList : TDataLineRecArray
  457. );
  458. procedure GenerateBmpTables(
  459. ADest : TStream;
  460. var AFirstTable : TBmpFirstTable;
  461. var ASecondTable : TBmpSecondTable
  462. );
  463. procedure Generate3lvlBmpTables(
  464. ADest : TStream;
  465. var AFirstTable : T3lvlBmp1Table;
  466. var ASecondTable : T3lvlBmp2Table;
  467. var AThirdTable : T3lvlBmp3Table
  468. );
  469. procedure GeneratePropTable(
  470. ADest : TStream;
  471. const APropList : TPropRecArray;
  472. const AEndian : TEndianKind
  473. );
  474. procedure GenerateNumericTable(
  475. ADest : TStream;
  476. const ANumList : TNumericValueArray;
  477. const ACompleteUnit : Boolean
  478. );
  479. procedure GenerateDecompositionBookTable(
  480. ADest : TStream;
  481. const ABook : TDecompositionBook;
  482. const AEndian : TEndianKind
  483. );
  484. procedure GenerateOutBmpTable(
  485. ADest : TStream;
  486. const AList : TDataLineRecArray
  487. );
  488. function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
  489. function EvaluateFloat(const AStr : string) : Double;
  490. function StrToCategory(const AStr : string) : TUnicodeCategory;
  491. function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
  492. function IsWhiteSpace(
  493. const ACodePoint : TUnicodeCodePoint;
  494. const AWhiteSpaces : TCodePointRecArray
  495. ) : Boolean;
  496. function GetPropID(
  497. ACodePoint : TUnicodeCodePoint;
  498. const ADataLineList : TDataLineRecArray
  499. ) : Cardinal;
  500. //--------------------
  501. procedure MakeOBmpTables(
  502. var AFirstTable : TOBmpFirstTable;
  503. var ASecondTable : TOBmpSecondTable;
  504. const ADataLineList : TDataLineRecArray
  505. );
  506. procedure MakeOBmpTables3Levels(
  507. var AFirstTable : T3lvlOBmp1Table;
  508. var ASecondTable : T3lvlOBmp2Table;
  509. var AThirdTable : T3lvlOBmp3Table;
  510. const ADataLineList : TDataLineRecArray
  511. );
  512. procedure GenerateOBmpTables(
  513. ADest : TStream;
  514. var AFirstTable : TOBmpFirstTable;
  515. var ASecondTable : TOBmpSecondTable
  516. );
  517. procedure Generate3lvlOBmpTables(
  518. ADest : TStream;
  519. var AFirstTable : T3lvlOBmp1Table;
  520. var ASecondTable : T3lvlOBmp2Table;
  521. var AThirdTable : T3lvlOBmp3Table
  522. );
  523. function GetProp(
  524. const AHighS,
  525. ALowS : Word;
  526. const AProps : TPropRecArray;
  527. var AFirstTable : TOBmpFirstTable;
  528. var ASecondTable : TOBmpSecondTable
  529. ): PPropRec; inline;overload;
  530. function GetProp(
  531. const AHighS,
  532. ALowS : Word;
  533. const AProps : TPropRecArray;
  534. var AFirstTable : T3lvlOBmp1Table;
  535. var ASecondTable : T3lvlOBmp2Table;
  536. var AThirdTable : T3lvlOBmp3Table
  537. ): PPropRec; inline;overload;
  538. procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
  539. function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
  540. //--------------------
  541. type
  542. TBitOrder = 0..7;
  543. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
  544. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
  545. function GenerateEndianIncludeFileName(
  546. const AStoreName : string;
  547. const AEndian : TEndianKind
  548. ): string;inline;
  549. procedure ReverseFromNativeEndian(
  550. const AData : PUCA_PropItemRec;
  551. const ADataLen : Cardinal;
  552. const ADest : PUCA_PropItemRec
  553. );
  554. procedure ReverseToNativeEndian(
  555. const AData : PUCA_PropItemRec;
  556. const ADataLen : Cardinal;
  557. const ADest : PUCA_PropItemRec
  558. );
  559. procedure CompareProps(
  560. const AProp1,
  561. AProp2 : PUCA_PropItemRec;
  562. const ADataLen : Integer
  563. );
  564. resourcestring
  565. SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
  566. implementation
  567. uses
  568. typinfo, Math, AVL_Tree,
  569. trie;
  570. type
  571. TCardinalRec = packed record
  572. {$ifdef FPC_LITTLE_ENDIAN}
  573. byte0, byte1, byte2, byte3 : Byte;
  574. {$else FPC_LITTLE_ENDIAN}
  575. byte3, byte2, byte1, byte0 : Byte;
  576. {$endif FPC_LITTLE_ENDIAN}
  577. end;
  578. TWordRec = packed record
  579. {$ifdef FPC_LITTLE_ENDIAN}
  580. byte0, byte1 : Byte;
  581. {$else FPC_LITTLE_ENDIAN}
  582. byte1, byte0 : Byte;
  583. {$endif FPC_LITTLE_ENDIAN}
  584. end;
  585. { TUInt24Rec }
  586. class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
  587. begin
  588. TCardinalRec(Result).byte0 := a.byte0;
  589. TCardinalRec(Result).byte1 := a.byte1;
  590. TCardinalRec(Result).byte2 := a.byte2;
  591. TCardinalRec(Result).byte3 := 0;
  592. end;
  593. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  594. begin
  595. TCardinalRec(Result).byte0 := a.byte0;
  596. TCardinalRec(Result).byte1 := a.byte1;
  597. TCardinalRec(Result).byte2 := a.byte2;
  598. TCardinalRec(Result).byte3 := 0;
  599. end;
  600. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  601. begin
  602. Result := Cardinal(a);
  603. end;
  604. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  605. begin
  606. {$IFOPT R+}
  607. if (a.byte2 > 0) then
  608. Error(reIntOverflow);
  609. {$ENDIF R+}
  610. TWordRec(Result).byte0 := a.byte0;
  611. TWordRec(Result).byte1 := a.byte1;
  612. end;
  613. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  614. begin
  615. {$IFOPT R+}
  616. if (a.byte1 > 0) or (a.byte2 > 0) then
  617. Error(reIntOverflow);
  618. {$ENDIF R+}
  619. Result := a.byte0;
  620. end;
  621. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  622. begin
  623. {$IFOPT R+}
  624. if (a > $FFFFFF) then
  625. Error(reIntOverflow);
  626. {$ENDIF R+}
  627. Result.byte0 := TCardinalRec(a).byte0;
  628. Result.byte1 := TCardinalRec(a).byte1;
  629. Result.byte2 := TCardinalRec(a).byte2;
  630. end;
  631. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  632. begin
  633. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  634. end;
  635. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  636. begin
  637. Result := (TCardinalRec(b).byte3 = 0) and
  638. (a.byte0 = TCardinalRec(b).byte0) and
  639. (a.byte1 = TCardinalRec(b).byte1) and
  640. (a.byte2 = TCardinalRec(b).byte2);
  641. end;
  642. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  643. begin
  644. Result := (b = a);
  645. end;
  646. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  647. begin
  648. Result := (LongInt(a) = b);
  649. end;
  650. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  651. begin
  652. Result := (b = a);
  653. end;
  654. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  655. begin
  656. Result := (a.byte2 = 0) and
  657. (a.byte0 = TWordRec(b).byte0) and
  658. (a.byte1 = TWordRec(b).byte1);
  659. end;
  660. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  661. begin
  662. Result := (b = a);
  663. end;
  664. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  665. begin
  666. Result := (a.byte2 = 0) and
  667. (a.byte1 = 0) and
  668. (a.byte0 = b);
  669. end;
  670. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  671. begin
  672. Result := (b = a);
  673. end;
  674. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  675. begin
  676. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  677. end;
  678. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  679. begin
  680. Result := (TCardinalRec(b).byte3 <> 0) or
  681. (a.byte0 <> TCardinalRec(b).byte0) or
  682. (a.byte1 <> TCardinalRec(b).byte1) or
  683. (a.byte2 <> TCardinalRec(b).byte2);
  684. end;
  685. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  686. begin
  687. Result := (b <> a);
  688. end;
  689. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  690. begin
  691. Result := (a.byte2 > b.byte2) or
  692. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  693. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  694. end;
  695. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  696. begin
  697. Result := Cardinal(a) > b;
  698. end;
  699. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  700. begin
  701. Result := a > Cardinal(b);
  702. end;
  703. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  704. begin
  705. Result := (a.byte2 > b.byte2) or
  706. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  707. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  708. end;
  709. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  710. begin
  711. Result := Cardinal(a) >= b;
  712. end;
  713. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  714. begin
  715. Result := a >= Cardinal(b);
  716. end;
  717. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  718. begin
  719. Result := (b > a);
  720. end;
  721. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  722. begin
  723. Result := Cardinal(a) < b;
  724. end;
  725. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  726. begin
  727. Result := a < Cardinal(b);
  728. end;
  729. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  730. begin
  731. Result := (b >= a);
  732. end;
  733. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  734. begin
  735. Result := Cardinal(a) <= b;
  736. end;
  737. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  738. begin
  739. Result := a <= Cardinal(b);
  740. end;
  741. function GenerateEndianIncludeFileName(
  742. const AStoreName : string;
  743. const AEndian : TEndianKind
  744. ): string;inline;
  745. begin
  746. Result := ExtractFilePath(AStoreName) +
  747. ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
  748. end;
  749. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
  750. begin
  751. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  752. end;
  753. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
  754. begin
  755. if AValue then
  756. AData := AData or (1 shl (ABit mod 8))
  757. else
  758. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  759. end;
  760. var
  761. FS : TFormatSettings;
  762. function EvaluateFloat(const AStr : string) : Double;
  763. var
  764. s, n, d : string;
  765. i : Integer;
  766. begin
  767. Result := 0;
  768. s := Trim(AStr);
  769. if (Length(s) > 0) then begin
  770. i := Pos('/',s);
  771. if (i < 1) then
  772. Result := StrToFloat(s,FS)
  773. else begin
  774. n := Copy(s,1,i-1);
  775. d := Copy(s,i+1,MaxInt);
  776. Result := StrToInt(n) / StrToInt(d);
  777. end;
  778. end;
  779. end;
  780. function StrToCategory(const AStr : string) : TUnicodeCategory;
  781. var
  782. s : string;
  783. begin
  784. s := UpperCase(Trim(AStr));
  785. if (s = 'LU') then
  786. Result := ucUppercaseLetter
  787. else if (s = 'LL') then
  788. Result := ucLowercaseLetter
  789. else if (s = 'LT') then
  790. Result := ucTitlecaseLetter
  791. else if (s = 'LM') then
  792. Result := ucModifierLetter
  793. else if (s = 'LO') then
  794. Result := ucOtherLetter
  795. else
  796. if (s = 'MN') then
  797. Result := ucNonSpacingMark
  798. else if (s = 'MC') then
  799. Result := ucCombiningMark
  800. else if (s = 'ME') then
  801. Result := ucEnclosingMark
  802. else
  803. if (s = 'ND') then
  804. Result := ucDecimalNumber
  805. else if (s = 'NL') then
  806. Result := ucLetterNumber
  807. else if (s = 'NO') then
  808. Result := ucOtherNumber
  809. else
  810. if (s = 'PC') then
  811. Result := ucConnectPunctuation
  812. else if (s = 'PD') then
  813. Result := ucDashPunctuation
  814. else if (s = 'PS') then
  815. Result := ucOpenPunctuation
  816. else if (s = 'PE') then
  817. Result := ucClosePunctuation
  818. else if (s = 'PI') then
  819. Result := ucInitialPunctuation
  820. else if (s = 'PF') then
  821. Result := ucFinalPunctuation
  822. else if (s = 'PO') then
  823. Result := ucOtherPunctuation
  824. else
  825. if (s = 'SM') then
  826. Result := ucMathSymbol
  827. else if (s = 'SC') then
  828. Result := ucCurrencySymbol
  829. else if (s = 'SK') then
  830. Result := ucModifierSymbol
  831. else if (s = 'SO') then
  832. Result := ucOtherSymbol
  833. else
  834. if (s = 'ZS') then
  835. Result := ucSpaceSeparator
  836. else if (s = 'ZL') then
  837. Result := ucLineSeparator
  838. else if (s = 'ZP') then
  839. Result := ucParagraphSeparator
  840. else
  841. if (s = 'CC') then
  842. Result := ucControl
  843. else if (s = 'CF') then
  844. Result := ucFormat
  845. else if (s = 'CS') then
  846. Result := ucSurrogate
  847. else if (s = 'CO') then
  848. Result := ucPrivateUse
  849. else
  850. Result := ucUnassigned;
  851. end;
  852. function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
  853. var
  854. s : string;
  855. begin
  856. s := Trim(ACP);
  857. Result := 0;
  858. if (Length(s) > 0) and (s <> '#') then
  859. Result := StrToInt('$' + s);
  860. end;
  861. {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
  862. begin
  863. case ACodePoint of
  864. $0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
  865. $0020 : Result := True;// White_Space # Zs SPACE
  866. $0085 : Result := True;// White_Space # Cc <control-0085>
  867. $00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
  868. $1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
  869. $180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
  870. $2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
  871. $2028 : Result := True;// White_Space # Zl LINE SEPARATOR
  872. $2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
  873. $202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
  874. $205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
  875. $3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
  876. else
  877. Result := False;
  878. end;
  879. end;}
  880. function IsWhiteSpace(
  881. const ACodePoint : TUnicodeCodePoint;
  882. const AWhiteSpaces : TCodePointRecArray
  883. ) : Boolean;
  884. var
  885. i : Integer;
  886. p : ^TCodePointRec;
  887. begin
  888. p := @AWhiteSpaces[Low(AWhiteSpaces)];
  889. for i := Low(AWhiteSpaces) to High(AWhiteSpaces) do begin
  890. if (p^.LineType = 0) then begin
  891. if (p^.CodePoint = ACodePoint) then
  892. exit(True);
  893. end else begin
  894. if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then
  895. exit(True);
  896. end;
  897. Inc(p);
  898. end;
  899. Result := False;
  900. end;
  901. function NormalizeBlockName(const AName : string) : string;
  902. var
  903. i, c, k : Integer;
  904. s : string;
  905. begin
  906. c := Length(AName);
  907. SetLength(Result,c);
  908. s := LowerCase(AName);
  909. k := 0;
  910. for i := 1 to c do begin
  911. if (s[1] in ['a'..'z','0'..'9','-']) then begin
  912. k := k + 1;
  913. Result[k] := s[i];
  914. end;
  915. end;
  916. SetLength(Result,k);
  917. end;
  918. procedure ParseBlokcs(
  919. ADataAStream : TMemoryStream;
  920. var ABlocks : TBlocks
  921. );
  922. const
  923. LINE_LENGTH = 1024;
  924. DATA_LENGTH = 25000;
  925. var
  926. p : PAnsiChar;
  927. actualDataLen : Integer;
  928. bufferLength, bufferPos, lineLength, linePos : Integer;
  929. line : ansistring;
  930. function NextLine() : Boolean;
  931. var
  932. locOldPos : Integer;
  933. locOldPointer : PAnsiChar;
  934. begin
  935. Result := False;
  936. locOldPointer := p;
  937. locOldPos := bufferPos;
  938. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  939. Inc(p);
  940. Inc(bufferPos);
  941. end;
  942. if (locOldPos = bufferPos) and (p^ = #10) then begin
  943. lineLength := 0;
  944. Inc(p);
  945. Inc(bufferPos);
  946. linePos := 1;
  947. Result := True;
  948. end else if (locOldPos < bufferPos) then begin
  949. lineLength := (bufferPos - locOldPos);
  950. Move(locOldPointer^,line[1],lineLength);
  951. if (p^ = #10) then begin
  952. Dec(lineLength);
  953. Inc(p);
  954. Inc(bufferPos);
  955. end;
  956. linePos := 1;
  957. Result := True;
  958. end;
  959. end;
  960. function NextToken() : ansistring;
  961. var
  962. k : Integer;
  963. begin
  964. k := linePos;
  965. if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
  966. Inc(linePos);
  967. Result := Copy(line,k,(linePos-k));
  968. exit;
  969. end;
  970. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  971. Inc(linePos);
  972. if (linePos > k) then begin
  973. if (line[linePos] in [';','#','.']) then
  974. Result := Copy(line,k,(linePos-k))
  975. else
  976. Result := Copy(line,k,(linePos-k+1));
  977. Result := Trim(Result);
  978. end else begin
  979. Result := '';
  980. end;
  981. end;
  982. procedure ParseLine();
  983. var
  984. locData : TBlockItemRec;
  985. s : ansistring;
  986. begin
  987. s := NextToken();
  988. if (s = '') or (s[1] = '#') then
  989. exit;
  990. locData.RangeStart := StrToInt('$'+s);
  991. s := NextToken();
  992. if (s <> '.') then
  993. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  994. s := NextToken();
  995. if (s <> '.') then
  996. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  997. s := NextToken();
  998. locData.RangeEnd := StrToInt('$'+s);
  999. s := NextToken();
  1000. if (s <> ';') then
  1001. raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
  1002. locData.Name := Trim(NextToken());
  1003. locData.CanonicalName := NormalizeBlockName(locData.Name);
  1004. if (Length(ABlocks) <= actualDataLen) then
  1005. SetLength(ABlocks,Length(ABlocks)*2);
  1006. ABlocks[actualDataLen] := locData;
  1007. Inc(actualDataLen);
  1008. end;
  1009. procedure Prepare();
  1010. begin
  1011. SetLength(ABlocks,DATA_LENGTH);
  1012. actualDataLen := 0;
  1013. bufferLength := ADataAStream.Size;
  1014. bufferPos := 0;
  1015. p := ADataAStream.Memory;
  1016. lineLength := 0;
  1017. SetLength(line,LINE_LENGTH);
  1018. end;
  1019. begin
  1020. Prepare();
  1021. while NextLine() do
  1022. ParseLine();
  1023. SetLength(ABlocks,actualDataLen);
  1024. end;
  1025. procedure ParseProps(
  1026. ADataAStream : TMemoryStream;
  1027. var APropList : TPropListLineRecArray
  1028. );
  1029. const
  1030. LINE_LENGTH = 1024;
  1031. DATA_LENGTH = 25000;
  1032. var
  1033. p : PAnsiChar;
  1034. actualDataLen : Integer;
  1035. bufferLength, bufferPos, lineLength, linePos : Integer;
  1036. line : ansistring;
  1037. function NextLine() : Boolean;
  1038. var
  1039. locOldPos : Integer;
  1040. locOldPointer : PAnsiChar;
  1041. begin
  1042. Result := False;
  1043. locOldPointer := p;
  1044. locOldPos := bufferPos;
  1045. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1046. Inc(p);
  1047. Inc(bufferPos);
  1048. end;
  1049. if (locOldPos = bufferPos) and (p^ = #10) then begin
  1050. lineLength := 0;
  1051. Inc(p);
  1052. Inc(bufferPos);
  1053. linePos := 1;
  1054. Result := True;
  1055. end else if (locOldPos < bufferPos) then begin
  1056. lineLength := (bufferPos - locOldPos);
  1057. Move(locOldPointer^,line[1],lineLength);
  1058. if (p^ = #10) then begin
  1059. Dec(lineLength);
  1060. Inc(p);
  1061. Inc(bufferPos);
  1062. end;
  1063. linePos := 1;
  1064. Result := True;
  1065. end;
  1066. end;
  1067. function NextToken() : ansistring;
  1068. var
  1069. k : Integer;
  1070. begin
  1071. k := linePos;
  1072. if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
  1073. Inc(linePos);
  1074. Result := Copy(line,k,(linePos-k));
  1075. exit;
  1076. end;
  1077. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  1078. Inc(linePos);
  1079. if (linePos > k) then begin
  1080. if (line[linePos] in [';','#','.']) then
  1081. Result := Copy(line,k,(linePos-k))
  1082. else
  1083. Result := Copy(line,k,(linePos-k+1));
  1084. Result := Trim(Result);
  1085. end else begin
  1086. Result := '';
  1087. end;
  1088. end;
  1089. procedure ParseLine();
  1090. var
  1091. locCP : Cardinal;
  1092. locData : TPropListLineRec;
  1093. s : ansistring;
  1094. begin
  1095. s := NextToken();
  1096. if (s = '') or (s[1] = '#') then
  1097. exit;
  1098. locCP := StrToInt('$'+s);
  1099. s := NextToken();
  1100. if (s = ';') then begin
  1101. locData.CodePoint.LineType := 0;
  1102. locData.CodePoint.CodePoint := locCP;
  1103. end else begin
  1104. if (s = '') or (s <> '.') or (NextToken() <> '.') then
  1105. raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
  1106. locData.CodePoint.LineType := 1;
  1107. locData.CodePoint.StartCodePoint := locCP;
  1108. locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
  1109. s := NextToken();
  1110. if (s <> ';') then
  1111. raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
  1112. end;
  1113. locData.PropName := Trim(NextToken());
  1114. if (Length(APropList) <= actualDataLen) then
  1115. SetLength(APropList,Length(APropList)*2);
  1116. APropList[actualDataLen] := locData;
  1117. Inc(actualDataLen);
  1118. end;
  1119. procedure Prepare();
  1120. begin
  1121. SetLength(APropList,DATA_LENGTH);
  1122. actualDataLen := 0;
  1123. bufferLength := ADataAStream.Size;
  1124. bufferPos := 0;
  1125. p := ADataAStream.Memory;
  1126. lineLength := 0;
  1127. SetLength(line,LINE_LENGTH);
  1128. end;
  1129. begin
  1130. Prepare();
  1131. while NextLine() do
  1132. ParseLine();
  1133. SetLength(APropList,actualDataLen);
  1134. end;
  1135. function FindCodePointsByProperty(
  1136. const APropName : string;
  1137. const APropList : TPropListLineRecArray
  1138. ) : TCodePointRecArray;
  1139. var
  1140. r : TCodePointRecArray;
  1141. i, k : Integer;
  1142. s : string;
  1143. begin
  1144. k := 0;
  1145. r := nil;
  1146. s := LowerCase(Trim(APropName));
  1147. for i := Low(APropList) to High(APropList) do begin
  1148. if (LowerCase(APropList[i].PropName) = s) then begin
  1149. if (k >= Length(r)) then begin
  1150. if (k = 0) then
  1151. SetLength(r,24)
  1152. else
  1153. SetLength(r,(2*Length(r)));
  1154. end;
  1155. r[k] := APropList[i].CodePoint;
  1156. Inc(k);
  1157. end;
  1158. end;
  1159. SetLength(r,k);
  1160. Result := r;
  1161. end;
  1162. procedure ParseHangulSyllableTypes(
  1163. ADataAStream : TMemoryStream;
  1164. var ACodePointList : TCodePointRecArray
  1165. );
  1166. const
  1167. LINE_LENGTH = 1024;
  1168. DATA_LENGTH = 25000;
  1169. var
  1170. p : PAnsiChar;
  1171. actualDataLen : Integer;
  1172. bufferLength, bufferPos, lineLength, linePos : Integer;
  1173. line : ansistring;
  1174. function NextLine() : Boolean;
  1175. var
  1176. locOldPos : Integer;
  1177. locOldPointer : PAnsiChar;
  1178. begin
  1179. Result := False;
  1180. locOldPointer := p;
  1181. locOldPos := bufferPos;
  1182. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1183. Inc(p);
  1184. Inc(bufferPos);
  1185. end;
  1186. if (locOldPos = bufferPos) and (p^ = #10) then begin
  1187. lineLength := 0;
  1188. Inc(p);
  1189. Inc(bufferPos);
  1190. linePos := 1;
  1191. Result := True;
  1192. end else if (locOldPos < bufferPos) then begin
  1193. lineLength := (bufferPos - locOldPos);
  1194. Move(locOldPointer^,line[1],lineLength);
  1195. if (p^ = #10) then begin
  1196. Dec(lineLength);
  1197. Inc(p);
  1198. Inc(bufferPos);
  1199. end;
  1200. linePos := 1;
  1201. Result := True;
  1202. end;
  1203. end;
  1204. function NextToken() : ansistring;
  1205. var
  1206. k : Integer;
  1207. begin
  1208. k := linePos;
  1209. if (linePos < lineLength) and (line[linePos] = '.') then begin
  1210. Inc(linePos);
  1211. while (linePos < lineLength) and (line[linePos] = '.') do begin
  1212. Inc(linePos);
  1213. end;
  1214. Result := Copy(line,k,(linePos-k));
  1215. exit;
  1216. end;
  1217. while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
  1218. Inc(linePos);
  1219. if (linePos > k) then begin
  1220. if (line[linePos] in [';','#','.']) then
  1221. Result := Copy(line,k,(linePos-k))
  1222. else
  1223. Result := Copy(line,k,(linePos-k+1));
  1224. Result := Trim(Result);
  1225. end else begin
  1226. Result := '';
  1227. end;
  1228. //Inc(linePos);
  1229. end;
  1230. procedure ParseLine();
  1231. var
  1232. locData : TCodePointRec;
  1233. s : ansistring;
  1234. begin
  1235. s := NextToken();
  1236. if (s = '') or (s[1] = '#') then
  1237. exit;
  1238. locData.CodePoint := StrToInt('$'+s);
  1239. s := NextToken();
  1240. if (s = '') or (s[1] in [';','#']) then begin
  1241. locData.LineType := 0;
  1242. end else begin
  1243. if (s <> '..') then
  1244. raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
  1245. locData.StartCodePoint := locData.CodePoint;
  1246. locData.EndCodePoint := StrToInt('$'+NextToken());
  1247. locData.LineType := 1;
  1248. end;
  1249. if (Length(ACodePointList) <= actualDataLen) then
  1250. SetLength(ACodePointList,Length(ACodePointList)*2);
  1251. ACodePointList[actualDataLen] := locData;
  1252. Inc(actualDataLen);
  1253. end;
  1254. procedure Prepare();
  1255. begin
  1256. SetLength(ACodePointList,DATA_LENGTH);
  1257. actualDataLen := 0;
  1258. bufferLength := ADataAStream.Size;
  1259. bufferPos := 0;
  1260. p := ADataAStream.Memory;
  1261. lineLength := 0;
  1262. SetLength(line,LINE_LENGTH);
  1263. end;
  1264. begin
  1265. Prepare();
  1266. while NextLine() do
  1267. ParseLine();
  1268. SetLength(ACodePointList,actualDataLen);
  1269. end;
  1270. function IsHangulSyllable(
  1271. const ACodePoint : TUnicodeCodePoint;
  1272. const AHangulList : TCodePointRecArray
  1273. ) : Boolean;
  1274. var
  1275. i : Integer;
  1276. p : ^TCodePointRec;
  1277. begin
  1278. Result := False;
  1279. p := @AHangulList[Low(AHangulList)];
  1280. for i := Low(AHangulList) to High(AHangulList) do begin
  1281. if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
  1282. ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
  1283. then begin
  1284. Result := True;
  1285. Break;
  1286. end;
  1287. Inc(p);
  1288. end;
  1289. end;
  1290. function IndexOf(
  1291. const AProp : TPropRec;
  1292. const APropList : TPropRecArray;
  1293. const AActualLen : Integer
  1294. ) : Integer;overload;
  1295. var
  1296. i : Integer;
  1297. p : PPropRec;
  1298. begin
  1299. Result := -1;
  1300. if (AActualLen > 0) then begin
  1301. p := @APropList[0];
  1302. for i := 0 to AActualLen - 1 do begin
  1303. if (AProp.Category = p^.Category) and
  1304. (AProp.CCC = p^.CCC) and
  1305. (AProp.NumericIndex = p^.NumericIndex) and
  1306. (AProp.SimpleUpperCase = p^.SimpleUpperCase) and
  1307. (AProp.SimpleLowerCase = p^.SimpleLowerCase) and
  1308. (AProp.WhiteSpace = p^.WhiteSpace) and
  1309. //
  1310. (AProp.DecompositionID = p^.DecompositionID) and
  1311. (* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
  1312. ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
  1313. *)
  1314. (AProp.HangulSyllable = p^.HangulSyllable)
  1315. then begin
  1316. Result := i;
  1317. Break;
  1318. end;
  1319. Inc(p);
  1320. end;
  1321. end;
  1322. end;
  1323. function IndexOf(
  1324. const AItem : TUnicodeCodePointArray;
  1325. const AList : TDecompositionArray
  1326. ) : Integer;overload;
  1327. var
  1328. p : TUnicodeCodePointArray;
  1329. i : Integer;
  1330. begin
  1331. Result := -1;
  1332. if (Length(AList) = 0) then
  1333. exit;
  1334. for i := Low(AList) to High(AList) do begin
  1335. p := AList[i];
  1336. if (Length(p) = Length(AItem)) then begin
  1337. if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
  1338. exit(i);
  1339. end;
  1340. end;
  1341. Result := -1;
  1342. end;
  1343. function IndexOf(
  1344. const AItem : TNumericValue;
  1345. const AList : TNumericValueArray;
  1346. const AActualLen : Integer
  1347. ) : Integer;overload;
  1348. var
  1349. p : ^TNumericValue;
  1350. i : Integer;
  1351. begin
  1352. Result := -1;
  1353. if (AActualLen = 0) then
  1354. exit;
  1355. p := @AList[Low(AList)];
  1356. for i := Low(AList) to AActualLen - 1 do begin
  1357. if (AItem = p^) then
  1358. exit(i);
  1359. Inc(p);
  1360. end;
  1361. Result := -1;
  1362. end;
  1363. procedure Parse_UnicodeData(
  1364. ADataAStream : TMemoryStream;
  1365. var APropList : TPropRecArray;
  1366. var ANumericTable : TNumericValueArray;
  1367. var ADataLineList : TDataLineRecArray;
  1368. var ADecomposition : TDecompositionArray;
  1369. const AHangulList : TCodePointRecArray;
  1370. const AWhiteSpaces : TCodePointRecArray
  1371. );
  1372. const
  1373. LINE_LENGTH = 1024;
  1374. PROP_LENGTH = 5000;
  1375. DATA_LENGTH = 25000;
  1376. var
  1377. p : PAnsiChar;
  1378. bufferLength, bufferPos : Integer;
  1379. actualPropLen, actualDataLen, actualNumLen : Integer;
  1380. line : ansistring;
  1381. lineLength, linePos : Integer;
  1382. function NextLine() : Boolean;
  1383. var
  1384. locOldPos : Integer;
  1385. locOldPointer : PAnsiChar;
  1386. begin
  1387. Result := False;
  1388. locOldPointer := p;
  1389. locOldPos := bufferPos;
  1390. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  1391. Inc(p);
  1392. Inc(bufferPos);
  1393. end;
  1394. if (locOldPos < bufferPos) then begin
  1395. lineLength := (bufferPos - locOldPos);
  1396. Move(locOldPointer^,line[1],lineLength);
  1397. if (p^ = #10) then begin
  1398. Dec(lineLength);
  1399. Inc(p);
  1400. Inc(bufferPos);
  1401. end;
  1402. if (lineLength > 7) then begin
  1403. linePos := 1;
  1404. Result := True;
  1405. end;
  1406. end;
  1407. end;
  1408. function NextToken() : ansistring;
  1409. var
  1410. k : Integer;
  1411. begin
  1412. k := linePos;
  1413. while (linePos < lineLength) and not(line[linePos] in [';','#']) do
  1414. Inc(linePos);
  1415. if (linePos > k) then begin
  1416. if (line[linePos] in [';','#']) then
  1417. Result := Copy(line,k,(linePos-k))
  1418. else
  1419. Result := Copy(line,k,(linePos-k+1));
  1420. Result := Trim(Result);
  1421. end else begin
  1422. Result := '';
  1423. end;
  1424. Inc(linePos);
  1425. end;
  1426. function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
  1427. var
  1428. locStr, ks : ansistring;
  1429. k0,k : Integer;
  1430. begin
  1431. SetLength(Result,0);
  1432. locStr := UpperCase(Trim(AStr));
  1433. if (locStr = '') or (locStr[1] = '<') then
  1434. exit;
  1435. k0 := 1;
  1436. k := 1;
  1437. while (k <= Length(locStr)) do begin
  1438. while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
  1439. inc(k);
  1440. ks := Trim(Copy(locStr,k0,k-k0));
  1441. SetLength(Result,Length(Result)+1);
  1442. Result[Length(Result)-1] := StringToCodePoint(ks);
  1443. Inc(k);
  1444. k0 := k;
  1445. end;
  1446. end;
  1447. procedure ParseLine();
  1448. var
  1449. locCP : TUnicodeCodePoint;
  1450. locProp : TPropRec;
  1451. locData : TDataLineRec;
  1452. s : ansistring;
  1453. locRangeStart, locRangeEnd : Boolean;
  1454. k : Integer;
  1455. locDecompItem : TUnicodeCodePointArray;
  1456. numVal : TNumericValue;
  1457. begin
  1458. FillChar(locData,SizeOf(locData),#0);
  1459. FillChar(locProp,SizeOf(locProp),#0);
  1460. locCP := StrToInt('$'+NextToken());
  1461. s := NextToken();
  1462. locRangeStart := AnsiEndsText(', First>',s);
  1463. if locRangeStart then
  1464. locRangeEnd := False
  1465. else
  1466. locRangeEnd := AnsiEndsText(', Last>',s);
  1467. if locRangeStart then begin
  1468. locData.LineType := 1;
  1469. locData.StartCodePoint := locCP;
  1470. end else if locRangeEnd then begin
  1471. ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
  1472. exit;
  1473. //locData.EndCodePoint := locCP;
  1474. end else begin
  1475. locData.LineType := 0;
  1476. locData.CodePoint := locCP;
  1477. end;
  1478. locProp.Category := StrToCategory(NextToken());
  1479. locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
  1480. NextToken();//Bidi_Class
  1481. s := NextToken();//Decomposition_Type
  1482. locDecompItem := ParseCanonicalDecomposition(s);
  1483. if (Length(locDecompItem) = 0) then
  1484. locProp.DecompositionID := -1
  1485. else begin
  1486. locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
  1487. if (locProp.DecompositionID = -1) then begin
  1488. k := Length(ADecomposition);
  1489. locProp.DecompositionID := k;
  1490. SetLength(ADecomposition,k+1);
  1491. ADecomposition[k] := locDecompItem;
  1492. end;
  1493. end;
  1494. numVal := EvaluateFloat(NextToken());
  1495. if (numVal <> Double(0.0)) then begin
  1496. NextToken();
  1497. NextToken();
  1498. end else begin
  1499. s := NextToken();
  1500. if (s <> '') then
  1501. numVal := EvaluateFloat(s);
  1502. s := NextToken();
  1503. if (numVal = Double(0.0)) then
  1504. numVal := EvaluateFloat(s);
  1505. end;
  1506. k := IndexOf(numVal,ANumericTable,actualNumLen);
  1507. if (k = -1) then begin
  1508. if (actualNumLen >= Length(ANumericTable)) then
  1509. SetLength(ANumericTable,(actualNumLen*2));
  1510. ANumericTable[actualNumLen] := numVal;
  1511. k := actualNumLen;
  1512. Inc(actualNumLen);
  1513. end;
  1514. locProp.NumericIndex := k;
  1515. NextToken();//Bidi_Mirroed
  1516. NextToken();//Unicode_l_Name
  1517. NextToken();//ISO_Comment
  1518. locProp.SimpleUpperCase := StringToCodePoint(NextToken());
  1519. locProp.SimpleLowerCase := StringToCodePoint(NextToken());
  1520. NextToken();//Simple_Title_Case_Mapping
  1521. locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
  1522. locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
  1523. k := IndexOf(locProp,APropList,actualPropLen);
  1524. if (k = -1) then begin
  1525. k := actualPropLen;
  1526. locProp.PropID := k{ + 1};
  1527. APropList[k] := locProp;
  1528. Inc(actualPropLen);
  1529. end;
  1530. locData.PropID := k;
  1531. ADataLineList[actualDataLen] := locData;
  1532. Inc(actualDataLen);
  1533. end;
  1534. procedure Prepare();
  1535. var
  1536. r : TPropRec;
  1537. begin
  1538. SetLength(APropList,PROP_LENGTH);
  1539. actualPropLen := 0;
  1540. SetLength(ADataLineList,DATA_LENGTH);
  1541. actualDataLen := 0;
  1542. bufferLength := ADataAStream.Size;
  1543. bufferPos := 0;
  1544. p := ADataAStream.Memory;
  1545. lineLength := 0;
  1546. SetLength(line,LINE_LENGTH);
  1547. SetLength(ANumericTable,500);
  1548. actualNumLen := 0;
  1549. FillChar(r,SizeOf(r),#0);
  1550. r.PropID := 0;
  1551. r.Category := ucUnassigned;
  1552. r.DecompositionID := -1;
  1553. r.NumericIndex := 0;
  1554. APropList[0] := r;
  1555. Inc(actualPropLen);
  1556. ANumericTable[0] := 0;
  1557. Inc(actualNumLen);
  1558. end;
  1559. begin
  1560. Prepare();
  1561. while NextLine() do
  1562. ParseLine();
  1563. SetLength(APropList,actualPropLen);
  1564. SetLength(ADataLineList,actualDataLen);
  1565. SetLength(ANumericTable,actualNumLen);
  1566. end;
  1567. function GetPropID(
  1568. ACodePoint : TUnicodeCodePoint;
  1569. const ADataLineList : TDataLineRecArray
  1570. ) : Cardinal;
  1571. var
  1572. i : Integer;
  1573. p : PDataLineRec;
  1574. begin
  1575. Result := 0;
  1576. p := @ADataLineList[Low(ADataLineList)];
  1577. for i := Low(ADataLineList) to High(ADataLineList) do begin
  1578. if (p^.LineType = 0) then begin
  1579. if (p^.CodePoint = ACodePoint) then begin
  1580. Result := p^.PropID;
  1581. Break;
  1582. end;
  1583. end else begin
  1584. if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
  1585. Result := p^.PropID;
  1586. Break;
  1587. end;
  1588. end;
  1589. Inc(p);
  1590. end;
  1591. end;
  1592. procedure MakeDecomposition(
  1593. const ARawData : TDecompositionArray;
  1594. var ABook : TDecompositionBook
  1595. );
  1596. var
  1597. i, c, locPos : Integer;
  1598. locItem : TUnicodeCodePointArray;
  1599. begin
  1600. c := 0;
  1601. for i := Low(ARawData) to High(ARawData) do
  1602. c := c + Length(ARawData[i]);
  1603. SetLength(ABook.CodePoints,c);
  1604. SetLength(ABook.Index,Length(ARawData));
  1605. locPos := 0;
  1606. for i := Low(ARawData) to High(ARawData) do begin
  1607. locItem := ARawData[i];
  1608. ABook.Index[i].StartPosition := locPos;
  1609. ABook.Index[i].Length := Length(locItem);
  1610. Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
  1611. locPos := locPos + Length(locItem);
  1612. end;
  1613. end;
  1614. type
  1615. PBmpSecondTableItem = ^TBmpSecondTableItem;
  1616. function IndexOf(
  1617. const AItem : PBmpSecondTableItem;
  1618. const ATable : TBmpSecondTable;
  1619. const ATableActualLength : Integer
  1620. ) : Integer;overload;
  1621. var
  1622. i : Integer;
  1623. p : PBmpSecondTableItem;
  1624. begin
  1625. Result := -1;
  1626. if (ATableActualLength > 0) then begin
  1627. p := @ATable[0];
  1628. for i := 0 to ATableActualLength - 1 do begin
  1629. if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
  1630. Result := i;
  1631. Break;
  1632. end;
  1633. Inc(p);
  1634. end;
  1635. end;
  1636. end;
  1637. procedure MakeBmpTables(
  1638. var AFirstTable : TBmpFirstTable;
  1639. var ASecondTable : TBmpSecondTable;
  1640. const ADataLineList : TDataLineRecArray
  1641. );
  1642. var
  1643. locLowByte, locHighByte : Byte;
  1644. locTableItem : TBmpSecondTableItem;
  1645. locCP : TUnicodeCodePoint;
  1646. i, locSecondActualLen : Integer;
  1647. begin
  1648. SetLength(ASecondTable,120);
  1649. locSecondActualLen := 0;
  1650. for locHighByte := 0 to 255 do begin
  1651. FillChar(locTableItem,SizeOf(locTableItem),#0);
  1652. for locLowByte := 0 to 255 do begin
  1653. locCP := (locHighByte * 256) + locLowByte;
  1654. locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
  1655. end;
  1656. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  1657. if (i = -1) then begin
  1658. if (locSecondActualLen = Length(ASecondTable)) then
  1659. SetLength(ASecondTable,locSecondActualLen + 50);
  1660. i := locSecondActualLen;
  1661. ASecondTable[i] := locTableItem;
  1662. Inc(locSecondActualLen);
  1663. end;
  1664. AFirstTable[locHighByte] := i;
  1665. end;
  1666. SetLength(ASecondTable,locSecondActualLen);
  1667. end;
  1668. type
  1669. P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
  1670. function IndexOf(
  1671. const AItem : P3lvlBmp3TableItem;
  1672. const ATable : T3lvlBmp3Table;
  1673. const ATableActualLength : Integer
  1674. ) : Integer;overload;
  1675. var
  1676. i : Integer;
  1677. p : P3lvlBmp3TableItem;
  1678. begin
  1679. Result := -1;
  1680. if (ATableActualLength > 0) then begin
  1681. p := @ATable[0];
  1682. for i := 0 to ATableActualLength - 1 do begin
  1683. if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
  1684. Result := i;
  1685. Break;
  1686. end;
  1687. Inc(p);
  1688. end;
  1689. end;
  1690. end;
  1691. type
  1692. P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
  1693. function IndexOf(
  1694. const AItem : P3lvlBmp2TableItem;
  1695. const ATable : T3lvlBmp2Table
  1696. ) : Integer;overload;
  1697. var
  1698. i : Integer;
  1699. p : P3lvlBmp2TableItem;
  1700. begin
  1701. Result := -1;
  1702. if (Length(ATable) > 0) then begin
  1703. p := @ATable[0];
  1704. for i := 0 to Length(ATable) - 1 do begin
  1705. if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
  1706. Result := i;
  1707. Break;
  1708. end;
  1709. Inc(p);
  1710. end;
  1711. end;
  1712. end;
  1713. procedure MakeBmpTables3Levels(
  1714. var AFirstTable : T3lvlBmp1Table;
  1715. var ASecondTable : T3lvlBmp2Table;
  1716. var AThirdTable : T3lvlBmp3Table;
  1717. const ADataLineList : TDataLineRecArray
  1718. );
  1719. var
  1720. locLowByte0, locLowByte1, locHighByte : Byte;
  1721. locTableItem2 : T3lvlBmp2TableItem;
  1722. locTableItem3 : T3lvlBmp3TableItem;
  1723. locCP : TUnicodeCodePoint;
  1724. i, locThirdActualLen : Integer;
  1725. begin
  1726. SetLength(AThirdTable,120);
  1727. locThirdActualLen := 0;
  1728. for locHighByte := 0 to 255 do begin
  1729. FillChar(locTableItem2,SizeOf(locTableItem2),#0);
  1730. for locLowByte0 := 0 to 15 do begin
  1731. FillChar(locTableItem3,SizeOf(locTableItem3),#0);
  1732. for locLowByte1 := 0 to 15 do begin
  1733. locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
  1734. locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
  1735. end;
  1736. i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
  1737. if (i = -1) then begin
  1738. if (locThirdActualLen = Length(AThirdTable)) then
  1739. SetLength(AThirdTable,locThirdActualLen + 50);
  1740. i := locThirdActualLen;
  1741. AThirdTable[i] := locTableItem3;
  1742. Inc(locThirdActualLen);
  1743. end;
  1744. locTableItem2[locLowByte0] := i;
  1745. end;
  1746. i := IndexOf(@locTableItem2,ASecondTable);
  1747. if (i = -1) then begin
  1748. i := Length(ASecondTable);
  1749. SetLength(ASecondTable,(i + 1));
  1750. ASecondTable[i] := locTableItem2;
  1751. end;
  1752. AFirstTable[locHighByte] := i;
  1753. end;
  1754. SetLength(AThirdTable,locThirdActualLen);
  1755. end;
  1756. procedure GenerateLicenceText(ADest : TStream);
  1757. var
  1758. s : ansistring;
  1759. begin
  1760. s := SLicenseText + sLineBreak + sLineBreak;
  1761. ADest.Write(s[1],Length(s));
  1762. end;
  1763. procedure GenerateBmpTables(
  1764. ADest : TStream;
  1765. var AFirstTable : TBmpFirstTable;
  1766. var ASecondTable : TBmpSecondTable
  1767. );
  1768. procedure AddLine(const ALine : ansistring);
  1769. var
  1770. buffer : ansistring;
  1771. begin
  1772. buffer := ALine + sLineBreak;
  1773. ADest.Write(buffer[1],Length(buffer));
  1774. end;
  1775. var
  1776. i, j, c : Integer;
  1777. locLine : string;
  1778. begin
  1779. AddLine('const');
  1780. AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
  1781. locLine := '';
  1782. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  1783. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  1784. if (((i+1) mod 16) = 0) then begin
  1785. locLine := ' ' + locLine;
  1786. AddLine(locLine);
  1787. locLine := '';
  1788. end;
  1789. end;
  1790. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  1791. locLine := ' ' + locLine;
  1792. AddLine(locLine);
  1793. AddLine(' );' + sLineBreak);
  1794. AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
  1795. c := High(ASecondTable);
  1796. for i := Low(ASecondTable) to c do begin
  1797. locLine := '';
  1798. for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
  1799. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  1800. if (((j+1) mod 16) = 0) then begin
  1801. if (i = c) and (j = 255) then
  1802. Delete(locLine,Length(locLine),1);
  1803. locLine := ' ' + locLine;
  1804. AddLine(locLine);
  1805. locLine := '';
  1806. end;
  1807. end;
  1808. end;
  1809. AddLine(' );' + sLineBreak);
  1810. end;
  1811. //----------------------------------
  1812. procedure Generate3lvlBmpTables(
  1813. ADest : TStream;
  1814. var AFirstTable : T3lvlBmp1Table;
  1815. var ASecondTable : T3lvlBmp2Table;
  1816. var AThirdTable : T3lvlBmp3Table
  1817. );
  1818. procedure AddLine(const ALine : ansistring);
  1819. var
  1820. buffer : ansistring;
  1821. begin
  1822. buffer := ALine + sLineBreak;
  1823. ADest.Write(buffer[1],Length(buffer));
  1824. end;
  1825. var
  1826. i, j, c : Integer;
  1827. locLine : string;
  1828. begin
  1829. AddLine('const');
  1830. AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
  1831. locLine := '';
  1832. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  1833. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  1834. if (((i+1) mod 16) = 0) then begin
  1835. locLine := ' ' + locLine;
  1836. AddLine(locLine);
  1837. locLine := '';
  1838. end;
  1839. end;
  1840. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  1841. locLine := ' ' + locLine;
  1842. AddLine(locLine);
  1843. AddLine(' );' + sLineBreak);
  1844. AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
  1845. c := High(ASecondTable);
  1846. for i := Low(ASecondTable) to c do begin
  1847. locLine := '(';
  1848. for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
  1849. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  1850. Delete(locLine,Length(locLine),1);
  1851. locLine := ' ' + locLine + ')';
  1852. if (i < c) then
  1853. locLine := locLine + ',';
  1854. AddLine(locLine);
  1855. end;
  1856. AddLine(' );' + sLineBreak);
  1857. AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
  1858. c := High(AThirdTable);
  1859. for i := Low(AThirdTable) to c do begin
  1860. locLine := '(';
  1861. for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
  1862. locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
  1863. Delete(locLine,Length(locLine),1);
  1864. locLine := ' ' + locLine + ')';
  1865. if (i < c) then
  1866. locLine := locLine + ',';
  1867. AddLine(locLine);
  1868. end;
  1869. AddLine(' );' + sLineBreak);
  1870. end;
  1871. function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
  1872. begin
  1873. if (AEndian = ekBig) then
  1874. Result := Format(
  1875. '(byte2 : $%s; byte1 : $%s; byte0 : $%s;)',
  1876. [ IntToHex(AValue.byte2,2), IntToHex(AValue.byte1,2),
  1877. IntToHex(AValue.byte0,2)
  1878. ]
  1879. )
  1880. else
  1881. Result := Format(
  1882. '(byte0 : $%s; byte1 : $%s; byte2 : $%s;)',
  1883. [ IntToHex(AValue.byte0,2), IntToHex(AValue.byte1,2),
  1884. IntToHex(AValue.byte2,2)
  1885. ]
  1886. );
  1887. end;
  1888. procedure GeneratePropTable(
  1889. ADest : TStream;
  1890. const APropList : TPropRecArray;
  1891. const AEndian : TEndianKind
  1892. );
  1893. procedure AddLine(const ALine : ansistring);
  1894. var
  1895. buffer : ansistring;
  1896. begin
  1897. buffer := ALine + sLineBreak;
  1898. ADest.Write(buffer[1],Length(buffer));
  1899. end;
  1900. var
  1901. i : Integer;
  1902. locLine : string;
  1903. p : PPropRec;
  1904. begin
  1905. AddLine('');
  1906. AddLine('const');
  1907. AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
  1908. AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
  1909. p := @APropList[0];
  1910. for i := Low(APropList) to High(APropList) - 1 do begin
  1911. locLine := ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
  1912. ' CCC : ' + IntToStr(p^.CCC) + ';' +
  1913. ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
  1914. ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
  1915. ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
  1916. ' DecompositionID : ' + IntToStr(p^.DecompositionID) + '),';
  1917. AddLine(locLine);
  1918. Inc(p);
  1919. end;
  1920. locLine := //' (Category : TUnicodeCategory.' + GetEnumName(pti,Ord(p^.Category)) + ';' +
  1921. ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
  1922. ' CCC : ' + IntToStr(p^.CCC) + ';' +
  1923. ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
  1924. ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
  1925. ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
  1926. ' DecompositionID : ' + IntToStr(p^.DecompositionID) + ')';
  1927. AddLine(locLine);
  1928. AddLine(' );' + sLineBreak);
  1929. end;
  1930. procedure GenerateNumericTable(
  1931. ADest : TStream;
  1932. const ANumList : TNumericValueArray;
  1933. const ACompleteUnit : Boolean
  1934. );
  1935. procedure AddLine(const ALine : ansistring);
  1936. var
  1937. buffer : ansistring;
  1938. begin
  1939. buffer := ALine + sLineBreak;
  1940. ADest.Write(buffer[1],Length(buffer));
  1941. end;
  1942. var
  1943. i : Integer;
  1944. locLine : string;
  1945. p : ^TNumericValue;
  1946. begin
  1947. if ACompleteUnit then begin
  1948. GenerateLicenceText(ADest);
  1949. AddLine('unit unicodenumtable;');
  1950. AddLine('interface');
  1951. AddLine('');
  1952. end;
  1953. AddLine('');
  1954. AddLine('const');
  1955. AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
  1956. AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
  1957. locLine := '';
  1958. p := @ANumList[0];
  1959. for i := Low(ANumList) to High(ANumList) - 1 do begin
  1960. locLine := locLine + FloatToStr(p^,FS) + ' ,';
  1961. if (i > 0) and ((i mod 8) = 0) then begin
  1962. AddLine(' ' + locLine);
  1963. locLine := '';
  1964. end;
  1965. Inc(p);
  1966. end;
  1967. locLine := locLine + FloatToStr(p^,FS);
  1968. AddLine(' ' + locLine);
  1969. AddLine(' );' + sLineBreak);
  1970. if ACompleteUnit then begin
  1971. AddLine('');
  1972. AddLine('implementation');
  1973. AddLine('');
  1974. AddLine('end.');
  1975. end;
  1976. end;
  1977. procedure GenerateDecompositionBookTable(
  1978. ADest : TStream;
  1979. const ABook : TDecompositionBook;
  1980. const AEndian : TEndianKind
  1981. );
  1982. procedure AddLine(const ALine : ansistring);
  1983. var
  1984. buffer : ansistring;
  1985. begin
  1986. buffer := ALine + sLineBreak;
  1987. ADest.Write(buffer[1],Length(buffer));
  1988. end;
  1989. var
  1990. i, k : Integer;
  1991. p : ^TDecompositionIndexRec;
  1992. cp : ^TUnicodeCodePoint;
  1993. cp24 : UInt24;
  1994. locLine : string;
  1995. begin
  1996. AddLine('const');
  1997. AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
  1998. AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
  1999. AddLine('type');
  2000. AddLine(' TDecompositionIndexRec = packed record');
  2001. AddLine(' StartPosition : Word;');
  2002. AddLine(' Length : Byte;');
  2003. AddLine(' end;');
  2004. AddLine(' TDecompositionBookRec = packed record');
  2005. AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
  2006. AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
  2007. AddLine(' end;');
  2008. AddLine('const');
  2009. AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
  2010. p := @ABook.Index[0];
  2011. AddLine(' Index : (// Index BEGIN');
  2012. k := 0;
  2013. locLine := ' ';
  2014. for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
  2015. locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
  2016. ' Length : ' + IntToStr(p^.Length) + '), ';
  2017. k := k + 1;
  2018. if (k >= 2) then begin
  2019. AddLine(locLine);
  2020. locLine := ' ';
  2021. k := 0;
  2022. end;
  2023. Inc(p);
  2024. end;
  2025. locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
  2026. ' Length : ' + IntToStr(p^.Length) + ')';
  2027. AddLine(locLine);
  2028. AddLine(' ); // Index END');
  2029. cp := @ABook.CodePoints[0];
  2030. AddLine(' CodePoints : (// CodePoints BEGIN');
  2031. k := 0;
  2032. locLine := ' ';
  2033. for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
  2034. cp24 := cp^;
  2035. locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
  2036. Inc(k);
  2037. if (k >= 16) then begin
  2038. AddLine(locLine);
  2039. k := 0;
  2040. locLine := ' ';
  2041. end;
  2042. Inc(cp);
  2043. end;
  2044. cp24 := cp^;
  2045. locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
  2046. AddLine(locLine);
  2047. AddLine(' ); // CodePoints END');
  2048. AddLine(' );' + sLineBreak);
  2049. end;
  2050. procedure GenerateOutBmpTable(
  2051. ADest : TStream;
  2052. const AList : TDataLineRecArray
  2053. );
  2054. procedure AddLine(const ALine : ansistring);
  2055. var
  2056. buffer : ansistring;
  2057. begin
  2058. buffer := ALine + sLineBreak;
  2059. ADest.Write(buffer[1],Length(buffer));
  2060. end;
  2061. var
  2062. i, j : Integer;
  2063. locLine : string;
  2064. p : PDataLineRec;
  2065. begin
  2066. AddLine('');
  2067. //AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
  2068. //AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
  2069. j := -1;
  2070. p := @AList[0];
  2071. for i := 0 to Length(AList) - 1 do begin
  2072. if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
  2073. (p^.StartCodePoint > $FFFF)
  2074. then begin
  2075. j := i;
  2076. Break;
  2077. end;
  2078. Inc(p);
  2079. end;
  2080. if (j < 0) then
  2081. exit;
  2082. for i := j to Length(AList) - 2 do begin
  2083. locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
  2084. ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
  2085. ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
  2086. AddLine(locLine);
  2087. Inc(p);
  2088. end;
  2089. locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
  2090. ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
  2091. ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
  2092. AddLine(locLine);
  2093. AddLine(' );' + sLineBreak);
  2094. end;
  2095. function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
  2096. var
  2097. k, i, locResLen : Integer;
  2098. q, p, pr : PDataLineRec;
  2099. k_end : TUnicodeCodePoint;
  2100. begin
  2101. locResLen := 1;
  2102. SetLength(Result,Length(AData));
  2103. FillChar(Result[0],Length(Result),#0);
  2104. Result[0] := AData[0];
  2105. q := @AData[0];
  2106. k := 0;
  2107. while (k < Length(AData)) do begin
  2108. if (q^.LineType = 0) then
  2109. k_end := q^.CodePoint
  2110. else
  2111. k_end := q^.EndCodePoint;
  2112. if ((k+1) = Length(AData)) then begin
  2113. i := k;
  2114. end else begin
  2115. p := @AData[k+1];
  2116. i := k +1;
  2117. while (i < (Length(AData) {- 1})) do begin
  2118. if (p^.PropID <> q^.PropID) then begin
  2119. i := i - 1;
  2120. Break;
  2121. end;
  2122. if (p^.LineType = 0) then begin
  2123. if (p^.CodePoint <> (k_end + 1)) then begin
  2124. i := i - 1;
  2125. Break;
  2126. end;
  2127. Inc(k_end);
  2128. end else begin
  2129. if (p^.StartCodePoint <> (k_end + 1)) then begin
  2130. i := i - 1;
  2131. Break;
  2132. end;
  2133. k_end := p^.EndCodePoint;
  2134. end;
  2135. Inc(i);
  2136. Inc(p);
  2137. end;
  2138. end;
  2139. {if (i = k) then begin
  2140. Result[locResLen] := q^;
  2141. Inc(locResLen);
  2142. end else begin }
  2143. p := @AData[i];
  2144. pr := @Result[locResLen];
  2145. pr^.PropID := q^.PropID;
  2146. if (q^.LineType = 0) then
  2147. pr^.StartCodePoint := q^.CodePoint
  2148. else
  2149. pr^.StartCodePoint := q^.StartCodePoint;
  2150. pr^.LineType := 1;
  2151. if (p^.LineType = 0) then
  2152. pr^.EndCodePoint := p^.CodePoint
  2153. else
  2154. pr^.EndCodePoint := p^.EndCodePoint;
  2155. Inc(locResLen);
  2156. //end;
  2157. k := i + 1;
  2158. if (k = Length(AData)) then
  2159. Break;
  2160. q := @AData[k];
  2161. end;
  2162. SetLength(Result,locResLen);
  2163. end;
  2164. procedure ParseUCAFile(
  2165. ADataAStream : TMemoryStream;
  2166. var ABook : TUCA_DataBook
  2167. );
  2168. const
  2169. LINE_LENGTH = 1024;
  2170. DATA_LENGTH = 25000;
  2171. var
  2172. p : PAnsiChar;
  2173. actualDataLen : Integer;
  2174. bufferLength, bufferPos, lineLength, linePos : Integer;
  2175. line : ansistring;
  2176. function NextLine() : Boolean;
  2177. var
  2178. locOldPos : Integer;
  2179. locOldPointer : PAnsiChar;
  2180. begin
  2181. Result := False;
  2182. locOldPointer := p;
  2183. locOldPos := bufferPos;
  2184. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  2185. Inc(p);
  2186. Inc(bufferPos);
  2187. end;
  2188. if (locOldPos = bufferPos) and (p^ = #10) then begin
  2189. lineLength := 0;
  2190. Inc(p);
  2191. Inc(bufferPos);
  2192. linePos := 1;
  2193. Result := True;
  2194. end else if (locOldPos < bufferPos) then begin
  2195. lineLength := (bufferPos - locOldPos) + 1;
  2196. Move(locOldPointer^,line[1],lineLength);
  2197. if (p^ = #10) then begin
  2198. Dec(lineLength);
  2199. Inc(p);
  2200. Inc(bufferPos);
  2201. end;
  2202. linePos := 1;
  2203. Result := True;
  2204. end;
  2205. end;
  2206. procedure SkipSpace();
  2207. begin
  2208. while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
  2209. Inc(linePos);
  2210. end;
  2211. function NextToken() : ansistring;
  2212. const C_SEPARATORS = [';','#','.','[',']','*','@'];
  2213. var
  2214. k : Integer;
  2215. begin
  2216. SkipSpace();
  2217. k := linePos;
  2218. if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
  2219. Result := line[linePos];
  2220. Inc(linePos);
  2221. exit;
  2222. end;
  2223. while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
  2224. Inc(linePos);
  2225. if (linePos > k) then begin
  2226. if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
  2227. Result := Copy(line,k,(linePos-k))
  2228. else
  2229. Result := Copy(line,k,(linePos-k+1));
  2230. Result := Trim(Result);
  2231. end else begin
  2232. Result := '';
  2233. end;
  2234. end;
  2235. procedure CheckToken(const AToken : string);
  2236. var
  2237. a, b : string;
  2238. begin
  2239. a := LowerCase(Trim(AToken));
  2240. b := LowerCase(Trim(NextToken()));
  2241. if (a <> b) then
  2242. raise Exception.CreateFmt('Expected token "%s" but found "%s".',[a,b]);
  2243. end;
  2244. function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
  2245. var
  2246. s :AnsiString;
  2247. k : Integer;
  2248. begin
  2249. Result := False;
  2250. s := NextToken();
  2251. if (s <> '[') then
  2252. exit;
  2253. s := NextToken();
  2254. if (s = '.') then
  2255. ADest.Variable := False
  2256. else begin
  2257. if (s <> '*') then
  2258. raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
  2259. ADest.Variable := True;
  2260. end;
  2261. ADest.Weights[0] := StrToInt('$'+NextToken());
  2262. for k := 1 to 3 do begin
  2263. CheckToken('.');
  2264. ADest.Weights[k] := StrToInt('$'+NextToken());
  2265. end;
  2266. CheckToken(']');
  2267. Result := True;
  2268. end;
  2269. procedure ParseHeaderVar();
  2270. var
  2271. s,ss : string;
  2272. k : Integer;
  2273. begin
  2274. s := NextToken();
  2275. if (s = 'version') then begin
  2276. ss := '';
  2277. while True do begin
  2278. s := NextToken();
  2279. if (s = '') then
  2280. Break;
  2281. ss := ss + s;
  2282. end;
  2283. ABook.Version := ss;
  2284. end else if (s = 'variable') then begin
  2285. if (s = 'blanked') then
  2286. ABook.VariableWeight := ucaBlanked
  2287. else if (s = 'non-ignorable') then
  2288. ABook.VariableWeight := ucaNonIgnorable
  2289. else if (s = 'shifted') then
  2290. ABook.VariableWeight := ucaShifted
  2291. else if (s = 'shift-trimmed') then
  2292. ABook.VariableWeight := ucaShiftedTrimmed
  2293. else if (s = 'ignoresp') then
  2294. ABook.VariableWeight := ucaIgnoreSP
  2295. else
  2296. raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
  2297. end else if (s = 'backwards') or (s = 'forwards') then begin
  2298. ss := s;
  2299. s := NextToken();
  2300. k := StrToInt(s);
  2301. if (k < 1) or (k > 4) then
  2302. raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
  2303. ABook.Backwards[k] := (s = 'backwards');
  2304. end;
  2305. end;
  2306. procedure ParseLine();
  2307. var
  2308. locData : ^TUCA_LineRec;
  2309. s : ansistring;
  2310. kc : Integer;
  2311. begin
  2312. if (Length(ABook.Lines) <= actualDataLen) then
  2313. SetLength(ABook.Lines,Length(ABook.Lines)*2);
  2314. locData := @ABook.Lines[actualDataLen];
  2315. s := NextToken();
  2316. if (s = '') or (s[1] = '#') then
  2317. exit;
  2318. if (s[1] = '@') then begin
  2319. ParseHeaderVar();
  2320. exit;
  2321. end;
  2322. SetLength(locData^.CodePoints,10);
  2323. locData^.CodePoints[0] := StrToInt('$'+s);
  2324. kc := 1;
  2325. while True do begin
  2326. s := Trim(NextToken());
  2327. if (s = '') then
  2328. exit;
  2329. if (s = ';') then
  2330. Break;
  2331. locData^.CodePoints[kc] := StrToInt('$'+s);
  2332. Inc(kc);
  2333. end;
  2334. if (kc = 0) then
  2335. exit;
  2336. SetLength(locData^.CodePoints,kc);
  2337. SetLength(locData^.Weights,24);
  2338. kc := 0;
  2339. while ReadWeightBlock(locData^.Weights[kc]) do begin
  2340. Inc(kc);
  2341. end;
  2342. SetLength(locData^.Weights,kc);
  2343. Inc(actualDataLen);
  2344. end;
  2345. procedure Prepare();
  2346. var
  2347. k : Integer;
  2348. begin
  2349. ABook.VariableWeight := ucaShifted;
  2350. for k := Low(ABook.Backwards) to High(ABook.Backwards) do
  2351. ABook.Backwards[k] := False;
  2352. SetLength(ABook.Lines,DATA_LENGTH);
  2353. actualDataLen := 0;
  2354. bufferLength := ADataAStream.Size;
  2355. bufferPos := 0;
  2356. p := ADataAStream.Memory;
  2357. lineLength := 0;
  2358. SetLength(line,LINE_LENGTH);
  2359. end;
  2360. begin
  2361. Prepare();
  2362. while NextLine() do
  2363. ParseLine();
  2364. SetLength(ABook.Lines,actualDataLen);
  2365. end;
  2366. procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
  2367. var
  2368. i : Integer;
  2369. begin
  2370. Write(ATitle, ' ');
  2371. for i := 0 to Length(X) - 1 do
  2372. Write(X[i],' ');
  2373. WriteLn();
  2374. end;
  2375. function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
  2376. var
  2377. i, hb : Integer;
  2378. begin
  2379. if (A=B) then
  2380. exit(0);
  2381. Result := 1;
  2382. hb := Length(B^.CodePoints) - 1;
  2383. for i := 0 to Length(A^.CodePoints) - 1 do begin
  2384. if (i > hb) then
  2385. exit;
  2386. if (A^.CodePoints[i] < B^.CodePoints[i]) then
  2387. exit(-1);
  2388. if (A^.CodePoints[i] > B^.CodePoints[i]) then
  2389. exit(1);
  2390. end;
  2391. if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
  2392. exit(0);
  2393. exit(-1);
  2394. end;
  2395. Procedure QuickSort(var AList: TUCA_DataBookIndex; L, R : Longint;
  2396. ABook : PUCA_DataBook);
  2397. var
  2398. I, J : Longint;
  2399. P, Q : Integer;
  2400. begin
  2401. repeat
  2402. I := L;
  2403. J := R;
  2404. P := AList[ (L + R) div 2 ];
  2405. repeat
  2406. while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
  2407. I := I + 1;
  2408. while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
  2409. J := J - 1;
  2410. If I <= J then
  2411. begin
  2412. Q := AList[I];
  2413. AList[I] := AList[J];
  2414. AList[J] := Q;
  2415. I := I + 1;
  2416. J := J - 1;
  2417. end;
  2418. until I > J;
  2419. // sort the smaller range recursively
  2420. // sort the bigger range via the loop
  2421. // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
  2422. if J - L < R - I then
  2423. begin
  2424. if L < J then
  2425. QuickSort(AList, L, J, ABook);
  2426. L := I;
  2427. end
  2428. else
  2429. begin
  2430. if I < R then
  2431. QuickSort(AList, I, R, ABook);
  2432. R := J;
  2433. end;
  2434. until L >= R;
  2435. end;
  2436. function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
  2437. var
  2438. r : TUCA_DataBookIndex;
  2439. i, c : Integer;
  2440. begin
  2441. c := Length(ABook^.Lines);
  2442. SetLength(r,c);
  2443. for i := 0 to c - 1 do
  2444. r[i] := i;
  2445. QuickSort(r,0,c-1,ABook);
  2446. Result := r;
  2447. end;
  2448. function ConstructContextTree(
  2449. const AContext : PUCA_LineContextRec;
  2450. var ADestBuffer;
  2451. const ADestBufferLength : Cardinal
  2452. ) : PUCA_PropItemContextTreeRec;forward;
  2453. function ConstructItem(
  2454. AItem : PUCA_PropItemRec;
  2455. ACodePoint : Cardinal;
  2456. AValid : Byte;
  2457. AChildCount : Byte;
  2458. const AWeights : array of TUCA_WeightRec;
  2459. const AStoreCP : Boolean;
  2460. const AContext : PUCA_LineContextRec;
  2461. const ADeleted : Boolean
  2462. ) : Cardinal;
  2463. var
  2464. i : Integer;
  2465. p : PUCA_PropItemRec;
  2466. pw : PUCA_PropWeights;
  2467. pb : PByte;
  2468. hasContext : Boolean;
  2469. contextTree : PUCA_PropItemContextTreeRec;
  2470. wl : Integer;
  2471. begin
  2472. p := AItem;
  2473. p^.Size := 0;
  2474. p^.Flags := 0;
  2475. p^.WeightLength := 0;
  2476. SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
  2477. p^.ChildCount := AChildCount;
  2478. hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
  2479. if hasContext then
  2480. wl := 0
  2481. else
  2482. wl := Length(AWeights);
  2483. p^.WeightLength := wl;
  2484. if (wl = 0) then begin
  2485. Result := SizeOf(TUCA_PropItemRec);
  2486. if ADeleted then
  2487. SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
  2488. end else begin
  2489. Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
  2490. pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
  2491. PWord(pb)^ := AWeights[0].Weights[0];
  2492. pb := pb + 2;
  2493. if (AWeights[0].Weights[1] > High(Byte)) then begin
  2494. PWord(pb)^ := AWeights[0].Weights[1];
  2495. pb := pb + 2;
  2496. end else begin
  2497. SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
  2498. pb^ := AWeights[0].Weights[1];
  2499. pb := pb + 1;
  2500. Result := Result - 1;
  2501. end;
  2502. if (AWeights[0].Weights[2] > High(Byte)) then begin
  2503. PWord(pb)^ := AWeights[0].Weights[2];
  2504. pb := pb + 2;
  2505. end else begin
  2506. SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
  2507. pb^ := AWeights[0].Weights[2];
  2508. pb := pb + 1;
  2509. Result := Result - 1;
  2510. end;
  2511. pw := PUCA_PropWeights(pb);
  2512. for i := 1 to wl - 1 do begin
  2513. pw^.Weights[0] := AWeights[i].Weights[0];
  2514. pw^.Weights[1] := AWeights[i].Weights[1];
  2515. pw^.Weights[2] := AWeights[i].Weights[2];
  2516. //pw^.Variable := BoolToByte(AWeights[i].Variable);
  2517. Inc(pw);
  2518. end;
  2519. end;
  2520. hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
  2521. if AStoreCP or hasContext then begin
  2522. PUInt24(PtrUInt(AItem)+Result)^ := ACodePoint;
  2523. Result := Result + SizeOf(UInt24);
  2524. SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
  2525. end;
  2526. if hasContext then begin
  2527. contextTree := ConstructContextTree(AContext,Pointer(PtrUInt(AItem)+Result)^,MaxInt);
  2528. Result := Result + Cardinal(contextTree^.Size);
  2529. SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
  2530. end;
  2531. p^.Size := Result;
  2532. end;
  2533. function CalcCharChildCount(
  2534. const ASearchStartPos : Integer;
  2535. const ALinePos : Integer;
  2536. const ABookLines : PUCA_LineRec;
  2537. const AMaxLength : Integer;
  2538. const ABookIndex : TUCA_DataBookIndex;
  2539. out ALineCount : Word
  2540. ) : Byte;
  2541. var
  2542. locLinePos : Integer;
  2543. p : PUCA_LineRec;
  2544. procedure IncP();
  2545. begin
  2546. Inc(locLinePos);
  2547. p := @ABookLines[ABookIndex[locLinePos]];
  2548. end;
  2549. var
  2550. i, locTargetLen, locTargetBufferSize, r : Integer;
  2551. locTarget : array[0..127] of Cardinal;
  2552. locLastChar : Cardinal;
  2553. begin
  2554. locLinePos := ALinePos;
  2555. p := @ABookLines[ABookIndex[locLinePos]];
  2556. locTargetLen := ASearchStartPos;
  2557. locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
  2558. Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
  2559. if (Length(p^.CodePoints) = ASearchStartPos) then begin
  2560. r := 0;
  2561. locLastChar := High(Cardinal);
  2562. end else begin
  2563. r := 1;
  2564. locLastChar := p^.CodePoints[ASearchStartPos];
  2565. end;
  2566. i := 1;
  2567. while (i < AMaxLength) do begin
  2568. IncP();
  2569. if (Length(p^.CodePoints) < locTargetLen) then
  2570. Break;
  2571. if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
  2572. Break;
  2573. if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
  2574. Inc(r);
  2575. locLastChar := p^.CodePoints[ASearchStartPos];
  2576. end;
  2577. Inc(i);
  2578. end;
  2579. ALineCount := i;
  2580. Result := r;
  2581. end;
  2582. function BuildTrie(
  2583. const ALinePos : Integer;
  2584. const ABookLines : PUCA_LineRec;
  2585. const AMaxLength : Integer;
  2586. const ABookIndex : TUCA_DataBookIndex
  2587. ) : PTrieNode;
  2588. var
  2589. p : PUCA_LineRec;
  2590. root : PTrieNode;
  2591. ki, k, i : Integer;
  2592. key : array of TKeyType;
  2593. begin
  2594. k := ABookIndex[ALinePos];
  2595. p := @ABookLines[k];
  2596. if (Length(p^.CodePoints) = 1) then
  2597. root := CreateNode(p^.CodePoints[0],k)
  2598. else
  2599. root := CreateNode(p^.CodePoints[0]);
  2600. for i := ALinePos to ALinePos + AMaxLength - 1 do begin
  2601. k := ABookIndex[i];
  2602. p := @ABookLines[k];
  2603. if (Length(p^.CodePoints) = 1) then begin
  2604. InsertWord(root,p^.CodePoints[0],k);
  2605. end else begin
  2606. SetLength(key,Length(p^.CodePoints));
  2607. for ki := 0 to Length(p^.CodePoints) - 1 do
  2608. key[ki] := p^.CodePoints[ki];
  2609. InsertWord(root,key,k);
  2610. end;
  2611. end;
  2612. Result := root;
  2613. end;
  2614. function BoolToByte(AValue : Boolean): Byte;inline;
  2615. begin
  2616. if AValue then
  2617. Result := 1
  2618. else
  2619. Result := 0;
  2620. end;
  2621. function InternalConstructFromTrie(
  2622. const ATrie : PTrieNode;
  2623. const AItem : PUCA_PropItemRec;
  2624. const ALines : PUCA_LineRec;
  2625. const AStoreCp : Boolean
  2626. ) : Cardinal;
  2627. var
  2628. i : Integer;
  2629. size : Cardinal;
  2630. p : PUCA_PropItemRec;
  2631. n : PTrieNode;
  2632. begin
  2633. if (ATrie = nil) then
  2634. exit(0);
  2635. p := AItem;
  2636. n := ATrie;
  2637. if n^.DataNode then
  2638. size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
  2639. else
  2640. size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
  2641. Result := size;
  2642. if (n^.ChildCount > 0) then begin
  2643. for i := 0 to n^.ChildCount - 1 do begin
  2644. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2645. size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
  2646. Result := Result + size;
  2647. end;
  2648. end;
  2649. AItem^.Size := Result;
  2650. end;
  2651. function ConstructFromTrie(
  2652. const ATrie : PTrieNode;
  2653. const AItem : PUCA_PropItemRec;
  2654. const ALines : PUCA_LineRec
  2655. ) : Integer;
  2656. begin
  2657. Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
  2658. end;
  2659. procedure MakeUCA_Props(
  2660. ABook : PUCA_DataBook;
  2661. out AProps : PUCA_PropBook
  2662. );
  2663. var
  2664. propIndexCount : Integer;
  2665. procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
  2666. begin
  2667. AProps^.Index[propIndexCount].CodePoint := ACodePoint;
  2668. AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
  2669. propIndexCount := propIndexCount + 1;
  2670. end;
  2671. var
  2672. locIndex : TUCA_DataBookIndex;
  2673. i, c, k, kc : Integer;
  2674. p, p1, p2 : PUCA_PropItemRec;
  2675. lines, pl1, pl2 : PUCA_LineRec;
  2676. childCount, lineCount : Word;
  2677. size : Cardinal;
  2678. trieRoot : PTrieNode;
  2679. MaxChildCount, MaxSize : Cardinal;
  2680. childList : array of PUCA_PropItemRec;
  2681. begin
  2682. locIndex := CreateIndex(ABook);
  2683. i := Length(ABook^.Lines);
  2684. i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
  2685. AProps := AllocMem(SizeOf(TUCA_DataBook));
  2686. AProps^.ItemSize := i;
  2687. AProps^.Items := AllocMem(i);
  2688. propIndexCount := 0;
  2689. SetLength(AProps^.Index,Length(ABook^.Lines));
  2690. p := AProps^.Items;
  2691. lines := @ABook^.Lines[0];
  2692. c := Length(locIndex);
  2693. i := 0;
  2694. MaxChildCount := 0; MaxSize := 0;
  2695. while (i < (c-1)) do begin
  2696. pl1 := @lines[locIndex[i]];
  2697. if not pl1^.Stored then begin
  2698. i := i + 1;
  2699. Continue;
  2700. end;
  2701. pl2 := @lines[locIndex[i+1]];
  2702. if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
  2703. if (Length(pl1^.CodePoints) = 1) then begin
  2704. size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
  2705. CapturePropIndex(p,pl1^.CodePoints[0]);
  2706. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2707. if (size > MaxSize) then
  2708. MaxSize := size;
  2709. end else begin
  2710. kc := Length(pl1^.CodePoints);
  2711. SetLength(childList,kc);
  2712. for k := 0 to kc - 2 do begin
  2713. size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
  2714. if (k = 0) then
  2715. CapturePropIndex(p,pl1^.CodePoints[k]);
  2716. childList[k] := p;
  2717. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2718. end;
  2719. size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
  2720. childList[kc-1] := p;
  2721. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2722. for k := kc - 2 downto 0 do begin
  2723. p1 := childList[k];
  2724. p2 := childList[k+1];
  2725. p1^.Size := p1^.Size + p2^.Size;
  2726. end;
  2727. if (p1^.Size > MaxSize) then
  2728. MaxSize := p1^.Size;
  2729. end;
  2730. lineCount := 1;
  2731. end else begin
  2732. childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
  2733. if (childCount < 1) then
  2734. raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
  2735. if (lineCount < 2) then
  2736. raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
  2737. if (childCount > MaxChildCount) then
  2738. MaxChildCount := childCount;
  2739. trieRoot := BuildTrie(i,lines,lineCount,locIndex);
  2740. size := ConstructFromTrie(trieRoot,p,lines);
  2741. CapturePropIndex(p,pl1^.CodePoints[0]);
  2742. FreeNode(trieRoot);
  2743. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2744. if (size > MaxSize) then
  2745. MaxSize := size;
  2746. end;
  2747. i := i + lineCount;
  2748. end;
  2749. if (i = (c-1)) then begin
  2750. pl1 := @lines[locIndex[i]];
  2751. if (Length(pl1^.CodePoints) = 1) then begin
  2752. size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
  2753. CapturePropIndex(p,pl1^.CodePoints[0]);
  2754. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2755. if (size > MaxSize) then
  2756. MaxSize := size;
  2757. end else begin
  2758. kc := Length(pl1^.CodePoints);
  2759. SetLength(childList,kc);
  2760. for k := 0 to kc - 2 do begin
  2761. size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
  2762. if (k = 0) then
  2763. CapturePropIndex(p,pl1^.CodePoints[0]);
  2764. childList[k] := p;
  2765. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2766. end;
  2767. size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
  2768. childList[kc-1] := p;
  2769. p := PUCA_PropItemRec(PtrUInt(p) + size);
  2770. for i := kc - 2 downto 0 do begin
  2771. p1 := childList[i];
  2772. p2 := childList[i+1];
  2773. p1^.Size := p1^.Size + p2^.Size;
  2774. end;
  2775. if (size > MaxSize) then
  2776. MaxSize := size;
  2777. end;
  2778. end;
  2779. c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
  2780. ReAllocMem(AProps^.Items,c);
  2781. AProps^.ItemSize := c;
  2782. SetLength(AProps^.Index,propIndexCount);
  2783. AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
  2784. ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
  2785. k := 0;
  2786. c := High(Word);
  2787. for i := 0 to Length(ABook^.Lines) - 1 do begin
  2788. if (Length(ABook^.Lines[i].Weights) > 0) then begin
  2789. if (ABook^.Lines[i].Weights[0].Variable) then begin
  2790. if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
  2791. k := ABook^.Lines[i].Weights[0].Weights[0];
  2792. if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
  2793. c := ABook^.Lines[i].Weights[0].Weights[0];
  2794. end;
  2795. end;
  2796. end;
  2797. AProps^.VariableHighLimit := k;
  2798. AProps^.VariableLowLimit := c;
  2799. end;
  2800. procedure FreeUcaBook(var ABook : PUCA_PropBook);
  2801. var
  2802. p : PUCA_PropBook;
  2803. begin
  2804. if (ABook = nil) then
  2805. exit;
  2806. p := ABook;
  2807. ABook := nil;
  2808. p^.Index := nil;
  2809. FreeMem(p^.Items,p^.ItemSize);
  2810. FreeMem(p,SizeOf(p^));
  2811. end;
  2812. function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
  2813. var
  2814. i : Integer;
  2815. begin
  2816. for i := 0 to Length(APropBook^.Index) - 1 do begin
  2817. if (ACodePoint = APropBook^.Index[i].CodePoint) then
  2818. exit(i);
  2819. end;
  2820. Result := -1;
  2821. end;
  2822. type
  2823. PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
  2824. function IndexOf(
  2825. const AItem : PucaBmpSecondTableItem;
  2826. const ATable : TucaBmpSecondTable;
  2827. const ATableActualLength : Integer
  2828. ) : Integer;overload;
  2829. var
  2830. i : Integer;
  2831. p : PucaBmpSecondTableItem;
  2832. begin
  2833. Result := -1;
  2834. if (ATableActualLength > 0) then begin
  2835. p := @ATable[0];
  2836. for i := 0 to ATableActualLength - 1 do begin
  2837. if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
  2838. Result := i;
  2839. Break;
  2840. end;
  2841. Inc(p);
  2842. end;
  2843. end;
  2844. end;
  2845. procedure MakeUCA_BmpTables(
  2846. var AFirstTable : TucaBmpFirstTable;
  2847. var ASecondTable : TucaBmpSecondTable;
  2848. const APropBook : PUCA_PropBook
  2849. );
  2850. var
  2851. locLowByte, locHighByte : Byte;
  2852. locTableItem : TucaBmpSecondTableItem;
  2853. locCP : TUnicodeCodePoint;
  2854. i, locSecondActualLen : Integer;
  2855. k : Integer;
  2856. begin
  2857. SetLength(ASecondTable,120);
  2858. locSecondActualLen := 0;
  2859. for locHighByte := 0 to 255 do begin
  2860. FillChar(locTableItem,SizeOf(locTableItem),#0);
  2861. for locLowByte := 0 to 255 do begin
  2862. locCP := (locHighByte * 256) + locLowByte;
  2863. k := IndexOf(locCP,APropBook);
  2864. if (k = -1) then
  2865. k := 0
  2866. else
  2867. k := APropBook^.Index[k].Position + 1;
  2868. locTableItem[locLowByte] := k;
  2869. end;
  2870. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  2871. if (i = -1) then begin
  2872. if (locSecondActualLen = Length(ASecondTable)) then
  2873. SetLength(ASecondTable,locSecondActualLen + 50);
  2874. i := locSecondActualLen;
  2875. ASecondTable[i] := locTableItem;
  2876. Inc(locSecondActualLen);
  2877. end;
  2878. AFirstTable[locHighByte] := i;
  2879. end;
  2880. SetLength(ASecondTable,locSecondActualLen);
  2881. end;
  2882. function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
  2883. begin
  2884. //copied from utf16toutf32
  2885. Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
  2886. end;
  2887. procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
  2888. begin
  2889. AHighS := Word((AValue - $10000) shr 10 + $d800);
  2890. ALowS := Word((AValue - $10000) and $3ff + $dc00);
  2891. end;
  2892. type
  2893. PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
  2894. function IndexOf(
  2895. const AItem : PucaOBmpSecondTableItem;
  2896. const ATable : TucaOBmpSecondTable;
  2897. const ATableActualLength : Integer
  2898. ) : Integer;overload;
  2899. var
  2900. i : Integer;
  2901. p : PucaOBmpSecondTableItem;
  2902. begin
  2903. Result := -1;
  2904. if (ATableActualLength > 0) then begin
  2905. p := @ATable[0];
  2906. for i := 0 to ATableActualLength - 1 do begin
  2907. if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
  2908. Result := i;
  2909. Break;
  2910. end;
  2911. Inc(p);
  2912. end;
  2913. end;
  2914. end;
  2915. procedure MakeUCA_OBmpTables(
  2916. var AFirstTable : TucaOBmpFirstTable;
  2917. var ASecondTable : TucaOBmpSecondTable;
  2918. const APropBook : PUCA_PropBook
  2919. );
  2920. var
  2921. locLowByte, locHighByte : Word;
  2922. locTableItem : TucaOBmpSecondTableItem;
  2923. locCP : TUnicodeCodePoint;
  2924. i, locSecondActualLen : Integer;
  2925. k : Integer;
  2926. begin
  2927. if (Length(ASecondTable) = 0) then
  2928. SetLength(ASecondTable,2000);
  2929. locSecondActualLen := 0;
  2930. for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
  2931. FillChar(locTableItem,SizeOf(locTableItem),#0);
  2932. for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
  2933. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
  2934. k := IndexOf(locCP,APropBook);
  2935. if (k = -1) then
  2936. k := 0
  2937. else
  2938. k := APropBook^.Index[k].Position + 1;
  2939. locTableItem[locLowByte] := k;
  2940. end;
  2941. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  2942. if (i = -1) then begin
  2943. if (locSecondActualLen = Length(ASecondTable)) then
  2944. SetLength(ASecondTable,locSecondActualLen + 50);
  2945. i := locSecondActualLen;
  2946. ASecondTable[i] := locTableItem;
  2947. Inc(locSecondActualLen);
  2948. end;
  2949. AFirstTable[locHighByte] := i;
  2950. end;
  2951. SetLength(ASecondTable,locSecondActualLen);
  2952. end;
  2953. function GetPropPosition(
  2954. const AHighS,
  2955. ALowS : Word;
  2956. const AFirstTable : PucaOBmpFirstTable;
  2957. const ASecondTable : PucaOBmpSecondTable
  2958. ): Integer;inline;overload;
  2959. begin
  2960. Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
  2961. end;
  2962. procedure GenerateUCA_Head(
  2963. ADest : TStream;
  2964. ABook : PUCA_DataBook;
  2965. AProps : PUCA_PropBook
  2966. );
  2967. procedure AddLine(const ALine : ansistring);
  2968. var
  2969. buffer : ansistring;
  2970. begin
  2971. buffer := ALine + sLineBreak;
  2972. ADest.Write(buffer[1],Length(buffer));
  2973. end;
  2974. begin
  2975. AddLine('const');
  2976. AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
  2977. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
  2978. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
  2979. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
  2980. AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
  2981. AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
  2982. AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
  2983. AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
  2984. AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
  2985. AddLine('');
  2986. end;
  2987. procedure GenerateUCA_BmpTables(
  2988. AStream,
  2989. ANativeEndianStream,
  2990. ANonNativeEndianStream : TStream;
  2991. var AFirstTable : TucaBmpFirstTable;
  2992. var ASecondTable : TucaBmpSecondTable
  2993. );
  2994. procedure AddLine(AOut : TStream; const ALine : ansistring);
  2995. var
  2996. buffer : ansistring;
  2997. begin
  2998. buffer := ALine + sLineBreak;
  2999. AOut.Write(buffer[1],Length(buffer));
  3000. end;
  3001. var
  3002. i, j, c : Integer;
  3003. locLine : string;
  3004. value : UInt24;
  3005. begin
  3006. AddLine(AStream,'const');
  3007. AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
  3008. locLine := '';
  3009. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3010. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3011. if (((i+1) mod 16) = 0) then begin
  3012. locLine := ' ' + locLine;
  3013. AddLine(AStream,locLine);
  3014. locLine := '';
  3015. end;
  3016. end;
  3017. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3018. locLine := ' ' + locLine;
  3019. AddLine(AStream,locLine);
  3020. AddLine(AStream,' );' + sLineBreak);
  3021. AddLine(ANativeEndianStream,'const');
  3022. AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3023. c := High(ASecondTable);
  3024. for i := Low(ASecondTable) to c do begin
  3025. locLine := '';
  3026. for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
  3027. value := ASecondTable[i][j];
  3028. locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
  3029. if (((j+1) mod 2) = 0) then begin
  3030. if (i = c) and (j = 255) then
  3031. Delete(locLine,Length(locLine),1);
  3032. locLine := ' ' + locLine;
  3033. AddLine(ANativeEndianStream,locLine);
  3034. locLine := '';
  3035. end;
  3036. end;
  3037. end;
  3038. AddLine(ANativeEndianStream,' );' + sLineBreak);
  3039. AddLine(ANonNativeEndianStream,'const');
  3040. AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3041. c := High(ASecondTable);
  3042. for i := Low(ASecondTable) to c do begin
  3043. locLine := '';
  3044. for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
  3045. value := ASecondTable[i][j];
  3046. locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
  3047. if (((j+1) mod 2) = 0) then begin
  3048. if (i = c) and (j = 255) then
  3049. Delete(locLine,Length(locLine),1);
  3050. locLine := ' ' + locLine;
  3051. AddLine(ANonNativeEndianStream,locLine);
  3052. locLine := '';
  3053. end;
  3054. end;
  3055. end;
  3056. AddLine(ANonNativeEndianStream,' );' + sLineBreak);
  3057. end;
  3058. procedure GenerateUCA_PropTable(
  3059. // WARNING : files must be generated for each endianess (Little / Big)
  3060. ADest : TStream;
  3061. const APropBook : PUCA_PropBook;
  3062. const AEndian : TEndianKind
  3063. );
  3064. procedure AddLine(const ALine : ansistring);
  3065. var
  3066. buffer : ansistring;
  3067. begin
  3068. buffer := ALine + sLineBreak;
  3069. ADest.Write(buffer[1],Length(buffer));
  3070. end;
  3071. var
  3072. i, c : Integer;
  3073. locLine : string;
  3074. p : PByte;
  3075. begin
  3076. c := APropBook^.ItemSize;
  3077. AddLine('const');
  3078. AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
  3079. locLine := '';
  3080. if (AEndian = ENDIAN_NATIVE) then
  3081. p := PByte(APropBook^.Items)
  3082. else
  3083. p := PByte(APropBook^.ItemsOtherEndian);
  3084. for i := 0 to c - 2 do begin
  3085. locLine := locLine + IntToStr(p[i]) + ',';
  3086. if (((i+1) mod 60) = 0) then begin
  3087. locLine := ' ' + locLine;
  3088. AddLine(locLine);
  3089. locLine := '';
  3090. end;
  3091. end;
  3092. locLine := locLine + IntToStr(p[c-1]);
  3093. locLine := ' ' + locLine;
  3094. AddLine(locLine);
  3095. AddLine(' );' + sLineBreak);
  3096. end;
  3097. procedure GenerateUCA_OBmpTables(
  3098. AStream,
  3099. ANativeEndianStream,
  3100. ANonNativeEndianStream : TStream;
  3101. var AFirstTable : TucaOBmpFirstTable;
  3102. var ASecondTable : TucaOBmpSecondTable
  3103. );
  3104. procedure AddLine(AOut : TStream; const ALine : ansistring);
  3105. var
  3106. buffer : ansistring;
  3107. begin
  3108. buffer := ALine + sLineBreak;
  3109. AOut.Write(buffer[1],Length(buffer));
  3110. end;
  3111. var
  3112. i, j, c : Integer;
  3113. locLine : string;
  3114. value : UInt24;
  3115. begin
  3116. AddLine(AStream,'const');
  3117. AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
  3118. locLine := '';
  3119. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3120. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3121. if (((i+1) mod 16) = 0) then begin
  3122. locLine := ' ' + locLine;
  3123. AddLine(AStream,locLine);
  3124. locLine := '';
  3125. end;
  3126. end;
  3127. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3128. locLine := ' ' + locLine;
  3129. AddLine(AStream,locLine);
  3130. AddLine(AStream,' );' + sLineBreak);
  3131. AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3132. c := High(ASecondTable);
  3133. for i := Low(ASecondTable) to c do begin
  3134. locLine := '';
  3135. for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
  3136. value := ASecondTable[i][j];
  3137. locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
  3138. if (((j+1) mod 2) = 0) then begin
  3139. if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
  3140. Delete(locLine,Length(locLine),1);
  3141. locLine := ' ' + locLine;
  3142. AddLine(ANativeEndianStream,locLine);
  3143. locLine := '';
  3144. end;
  3145. end;
  3146. end;
  3147. AddLine(ANativeEndianStream,' );' + sLineBreak);
  3148. AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
  3149. c := High(ASecondTable);
  3150. for i := Low(ASecondTable) to c do begin
  3151. locLine := '';
  3152. for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
  3153. value := ASecondTable[i][j];
  3154. locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
  3155. if (((j+1) mod 2) = 0) then begin
  3156. if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
  3157. Delete(locLine,Length(locLine),1);
  3158. locLine := ' ' + locLine;
  3159. AddLine(ANonNativeEndianStream,locLine);
  3160. locLine := '';
  3161. end;
  3162. end;
  3163. end;
  3164. AddLine(ANonNativeEndianStream,' );' + sLineBreak);
  3165. end;
  3166. //-------------------------------------------
  3167. type
  3168. POBmpSecondTableItem = ^TOBmpSecondTableItem;
  3169. function IndexOf(
  3170. const AItem : POBmpSecondTableItem;
  3171. const ATable : TOBmpSecondTable;
  3172. const ATableActualLength : Integer
  3173. ) : Integer;overload;
  3174. var
  3175. i : Integer;
  3176. p : POBmpSecondTableItem;
  3177. begin
  3178. Result := -1;
  3179. if (ATableActualLength > 0) then begin
  3180. p := @ATable[0];
  3181. for i := 0 to ATableActualLength - 1 do begin
  3182. if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
  3183. Result := i;
  3184. Break;
  3185. end;
  3186. Inc(p);
  3187. end;
  3188. end;
  3189. end;
  3190. procedure MakeOBmpTables(
  3191. var AFirstTable : TOBmpFirstTable;
  3192. var ASecondTable : TOBmpSecondTable;
  3193. const ADataLineList : TDataLineRecArray
  3194. );
  3195. var
  3196. locLowByte, locHighByte : Word;
  3197. locTableItem : TOBmpSecondTableItem;
  3198. locCP : TUnicodeCodePoint;
  3199. i, locSecondActualLen : Integer;
  3200. begin
  3201. SetLength(ASecondTable,2000);
  3202. locSecondActualLen := 0;
  3203. for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
  3204. FillChar(locTableItem,SizeOf(locTableItem),#0);
  3205. for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
  3206. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
  3207. locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
  3208. end;
  3209. i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
  3210. if (i = -1) then begin
  3211. if (locSecondActualLen = Length(ASecondTable)) then
  3212. SetLength(ASecondTable,locSecondActualLen + 50);
  3213. i := locSecondActualLen;
  3214. ASecondTable[i] := locTableItem;
  3215. Inc(locSecondActualLen);
  3216. end;
  3217. AFirstTable[locHighByte] := i;
  3218. end;
  3219. SetLength(ASecondTable,locSecondActualLen);
  3220. end;
  3221. type
  3222. P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
  3223. function IndexOf(
  3224. const AItem : P3lvlOBmp3TableItem;
  3225. const ATable : T3lvlOBmp3Table;
  3226. const ATableActualLength : Integer
  3227. ) : Integer;overload;
  3228. var
  3229. i : Integer;
  3230. p : P3lvlOBmp3TableItem;
  3231. begin
  3232. Result := -1;
  3233. if (ATableActualLength > 0) then begin
  3234. p := @ATable[0];
  3235. for i := 0 to ATableActualLength - 1 do begin
  3236. if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
  3237. Result := i;
  3238. Break;
  3239. end;
  3240. Inc(p);
  3241. end;
  3242. end;
  3243. end;
  3244. type
  3245. P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
  3246. function IndexOf(
  3247. const AItem : P3lvlOBmp2TableItem;
  3248. const ATable : T3lvlOBmp2Table
  3249. ) : Integer;overload;
  3250. var
  3251. i : Integer;
  3252. p : P3lvlOBmp2TableItem;
  3253. begin
  3254. Result := -1;
  3255. if (Length(ATable) > 0) then begin
  3256. p := @ATable[0];
  3257. for i := 0 to Length(ATable) - 1 do begin
  3258. if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
  3259. Result := i;
  3260. Break;
  3261. end;
  3262. Inc(p);
  3263. end;
  3264. end;
  3265. end;
  3266. procedure MakeOBmpTables3Levels(
  3267. var AFirstTable : T3lvlOBmp1Table;
  3268. var ASecondTable : T3lvlOBmp2Table;
  3269. var AThirdTable : T3lvlOBmp3Table;
  3270. const ADataLineList : TDataLineRecArray
  3271. );
  3272. var
  3273. locLowByte0, locLowByte1, locHighByte : Word;
  3274. locTableItem2 : T3lvlOBmp2TableItem;
  3275. locTableItem3 : T3lvlOBmp3TableItem;
  3276. locCP : TUnicodeCodePoint;
  3277. i, locThirdActualLen : Integer;
  3278. begin
  3279. SetLength(AThirdTable,120);
  3280. locThirdActualLen := 0;
  3281. for locHighByte := 0 to 1023 do begin
  3282. FillChar(locTableItem2,SizeOf(locTableItem2),#0);
  3283. for locLowByte0 := 0 to 31 do begin
  3284. FillChar(locTableItem3,SizeOf(locTableItem3),#0);
  3285. for locLowByte1 := 0 to 31 do begin
  3286. locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
  3287. locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
  3288. end;
  3289. i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
  3290. if (i = -1) then begin
  3291. if (locThirdActualLen = Length(AThirdTable)) then
  3292. SetLength(AThirdTable,locThirdActualLen + 50);
  3293. i := locThirdActualLen;
  3294. AThirdTable[i] := locTableItem3;
  3295. Inc(locThirdActualLen);
  3296. end;
  3297. locTableItem2[locLowByte0] := i;
  3298. end;
  3299. i := IndexOf(@locTableItem2,ASecondTable);
  3300. if (i = -1) then begin
  3301. i := Length(ASecondTable);
  3302. SetLength(ASecondTable,(i + 1));
  3303. ASecondTable[i] := locTableItem2;
  3304. end;
  3305. AFirstTable[locHighByte] := i;
  3306. end;
  3307. SetLength(AThirdTable,locThirdActualLen);
  3308. end;
  3309. procedure GenerateOBmpTables(
  3310. ADest : TStream;
  3311. var AFirstTable : TOBmpFirstTable;
  3312. var ASecondTable : TOBmpSecondTable
  3313. );
  3314. procedure AddLine(const ALine : ansistring);
  3315. var
  3316. buffer : ansistring;
  3317. begin
  3318. buffer := ALine + sLineBreak;
  3319. ADest.Write(buffer[1],Length(buffer));
  3320. end;
  3321. var
  3322. i, j, c : Integer;
  3323. locLine : string;
  3324. begin
  3325. AddLine('const');
  3326. AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
  3327. locLine := '';
  3328. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3329. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3330. if (((i+1) mod 16) = 0) then begin
  3331. locLine := ' ' + locLine;
  3332. AddLine(locLine);
  3333. locLine := '';
  3334. end;
  3335. end;
  3336. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3337. locLine := ' ' + locLine;
  3338. AddLine(locLine);
  3339. AddLine(' );' + sLineBreak);
  3340. AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
  3341. c := High(ASecondTable);
  3342. for i := Low(ASecondTable) to c do begin
  3343. locLine := '';
  3344. for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
  3345. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  3346. if (((j+1) mod 16) = 0) then begin
  3347. if (i = c) and (j = High(TOBmpSecondTableItem)) then
  3348. Delete(locLine,Length(locLine),1);
  3349. locLine := ' ' + locLine;
  3350. AddLine(locLine);
  3351. locLine := '';
  3352. end;
  3353. end;
  3354. end;
  3355. AddLine(' );' + sLineBreak);
  3356. end;
  3357. //----------------------------------
  3358. procedure Generate3lvlOBmpTables(
  3359. ADest : TStream;
  3360. var AFirstTable : T3lvlOBmp1Table;
  3361. var ASecondTable : T3lvlOBmp2Table;
  3362. var AThirdTable : T3lvlOBmp3Table
  3363. );
  3364. procedure AddLine(const ALine : ansistring);
  3365. var
  3366. buffer : ansistring;
  3367. begin
  3368. buffer := ALine + sLineBreak;
  3369. ADest.Write(buffer[1],Length(buffer));
  3370. end;
  3371. var
  3372. i, j, c : Integer;
  3373. locLine : string;
  3374. begin
  3375. AddLine('const');
  3376. AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
  3377. locLine := '';
  3378. for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
  3379. locLine := locLine + IntToStr(AFirstTable[i]) + ',';
  3380. if (((i+1) mod 16) = 0) then begin
  3381. locLine := ' ' + locLine;
  3382. AddLine(locLine);
  3383. locLine := '';
  3384. end;
  3385. end;
  3386. locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
  3387. locLine := ' ' + locLine;
  3388. AddLine(locLine);
  3389. AddLine(' );' + sLineBreak);
  3390. AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
  3391. c := High(ASecondTable);
  3392. for i := Low(ASecondTable) to c do begin
  3393. locLine := '(';
  3394. for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
  3395. locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
  3396. Delete(locLine,Length(locLine),1);
  3397. locLine := ' ' + locLine + ')';
  3398. if (i < c) then
  3399. locLine := locLine + ',';
  3400. AddLine(locLine);
  3401. end;
  3402. AddLine(' );' + sLineBreak);
  3403. AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
  3404. c := High(AThirdTable);
  3405. for i := Low(AThirdTable) to c do begin
  3406. locLine := '(';
  3407. for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
  3408. locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
  3409. Delete(locLine,Length(locLine),1);
  3410. locLine := ' ' + locLine + ')';
  3411. if (i < c) then
  3412. locLine := locLine + ',';
  3413. AddLine(locLine);
  3414. end;
  3415. AddLine(' );' + sLineBreak);
  3416. end;
  3417. function GetProp(
  3418. const AHighS,
  3419. ALowS : Word;
  3420. const AProps : TPropRecArray;
  3421. var AFirstTable : TOBmpFirstTable;
  3422. var ASecondTable : TOBmpSecondTable
  3423. ): PPropRec;
  3424. begin
  3425. Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
  3426. end;
  3427. function GetProp(
  3428. const AHighS,
  3429. ALowS : Word;
  3430. const AProps : TPropRecArray;
  3431. var AFirstTable : T3lvlOBmp1Table;
  3432. var ASecondTable : T3lvlOBmp2Table;
  3433. var AThirdTable : T3lvlOBmp3Table
  3434. ): PPropRec;
  3435. begin
  3436. Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
  3437. //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
  3438. end;
  3439. { TUCA_PropItemContextTreeRec }
  3440. function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
  3441. begin
  3442. if (Size = 0) then
  3443. Result := nil
  3444. else
  3445. Result := PUCA_PropItemContextTreeNodeRec(
  3446. PtrUInt(
  3447. PtrUInt(@Self) + SizeOf(UInt24){Size}
  3448. )
  3449. );
  3450. end;
  3451. { TUCA_LineContextRec }
  3452. procedure TUCA_LineContextRec.Clear;
  3453. begin
  3454. Data := nil
  3455. end;
  3456. procedure TUCA_LineContextRec.Assign(ASource : TUCA_LineContextRec);
  3457. var
  3458. c, i : Integer;
  3459. begin
  3460. c := Length(ASource.Data);
  3461. SetLength(Self.Data,c);
  3462. for i := 0 to c-1 do
  3463. Self.Data[i].Assign(ASource.Data[i]);
  3464. end;
  3465. function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
  3466. begin
  3467. Result.Clear();
  3468. Result.Assign(Self);
  3469. end;
  3470. { TUCA_LineContextItemRec }
  3471. procedure TUCA_LineContextItemRec.Clear();
  3472. begin
  3473. CodePoints := nil;
  3474. Weights := nil;
  3475. end;
  3476. procedure TUCA_LineContextItemRec.Assign(ASource : TUCA_LineContextItemRec);
  3477. begin
  3478. Self.CodePoints := Copy(ASource.CodePoints);
  3479. Self.Weights := Copy(ASource.Weights);
  3480. end;
  3481. function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
  3482. begin
  3483. Result.Clear();
  3484. Result.Assign(Self);
  3485. end;
  3486. { TUCA_LineRec }
  3487. procedure TUCA_LineRec.Clear;
  3488. begin
  3489. CodePoints := nil;
  3490. Weights := nil;
  3491. Deleted := False;
  3492. Stored := False;
  3493. Context.Clear();
  3494. end;
  3495. procedure TUCA_LineRec.Assign(ASource : TUCA_LineRec);
  3496. begin
  3497. Self.CodePoints := Copy(ASource.CodePoints);
  3498. Self.Weights := Copy(ASource.Weights);
  3499. Self.Deleted := ASource.Deleted;
  3500. Self.Stored := ASource.Stored;
  3501. Self.Context.Assign(ASource.Context);
  3502. end;
  3503. function TUCA_LineRec.Clone : TUCA_LineRec;
  3504. begin
  3505. Result.Clear();
  3506. Result.Assign(Self);
  3507. end;
  3508. function TUCA_LineRec.HasContext() : Boolean;
  3509. begin
  3510. Result := (Length(Context.Data) > 0);
  3511. end;
  3512. { TPropRec }
  3513. function TPropRec.GetCategory: TUnicodeCategory;
  3514. begin
  3515. Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
  3516. end;
  3517. procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
  3518. var
  3519. b : Byte;
  3520. begin
  3521. b := Ord(AValue);
  3522. b := b shl 3;
  3523. CategoryData := CategoryData or b;
  3524. //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
  3525. end;
  3526. function TPropRec.GetWhiteSpace: Boolean;
  3527. begin
  3528. Result := IsBitON(CategoryData,0);
  3529. end;
  3530. procedure TPropRec.SetWhiteSpace(AValue: Boolean);
  3531. begin
  3532. SetBit(CategoryData,0,AValue);
  3533. end;
  3534. function TPropRec.GetHangulSyllable: Boolean;
  3535. begin
  3536. Result := IsBitON(CategoryData,1);
  3537. end;
  3538. procedure TPropRec.SetHangulSyllable(AValue: Boolean);
  3539. begin
  3540. SetBit(CategoryData,1,AValue);
  3541. end;
  3542. { TUCA_PropItemRec }
  3543. function TUCA_PropItemRec.GetWeightSize : Word;
  3544. var
  3545. c : Integer;
  3546. begin
  3547. c := WeightLength;
  3548. if (c = 0) then
  3549. exit(0);
  3550. Result := c*SizeOf(TUCA_PropWeights);
  3551. if IsWeightCompress_1() then
  3552. Result := Result - 1;
  3553. if IsWeightCompress_2() then
  3554. Result := Result - 1;
  3555. end;
  3556. function TUCA_PropItemRec.HasCodePoint(): Boolean;
  3557. begin
  3558. Result := IsBitON(Flags,FLAG_CODEPOINT);
  3559. end;
  3560. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  3561. var
  3562. c : Integer;
  3563. p : PByte;
  3564. pd : PUCA_PropWeights;
  3565. begin
  3566. c := WeightLength;
  3567. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  3568. pd := ADest;
  3569. pd^.Weights[0] := PWord(p)^;
  3570. p := p + 2;
  3571. if not IsWeightCompress_1() then begin
  3572. pd^.Weights[1] := PWord(p)^;
  3573. p := p + 2;
  3574. end else begin
  3575. pd^.Weights[1] := p^;
  3576. p := p + 1;
  3577. end;
  3578. if not IsWeightCompress_2() then begin
  3579. pd^.Weights[2] := PWord(p)^;
  3580. p := p + 2;
  3581. end else begin
  3582. pd^.Weights[2] := p^;
  3583. p := p + 1;
  3584. end;
  3585. if (c > 1) then
  3586. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  3587. end;
  3588. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  3589. begin
  3590. Result := SizeOf(TUCA_PropItemRec);
  3591. if (WeightLength > 0) then begin
  3592. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  3593. if IsWeightCompress_1() then
  3594. Result := Result - 1;
  3595. if IsWeightCompress_2() then
  3596. Result := Result - 1;
  3597. end;
  3598. if HasCodePoint() then
  3599. Result := Result + SizeOf(UInt24);
  3600. if Contextual then
  3601. Result := Result + Cardinal(GetContext()^.Size);
  3602. end;
  3603. procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
  3604. begin
  3605. SetBit(Flags,FLAG_CONTEXTUAL,AValue);
  3606. end;
  3607. function TUCA_PropItemRec.GetContextual : Boolean;
  3608. begin
  3609. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  3610. end;
  3611. function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
  3612. var
  3613. p : PtrUInt;
  3614. begin
  3615. if not Contextual then
  3616. exit(nil);
  3617. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  3618. if IsBitON(Flags,FLAG_CODEPOINT) then
  3619. p := p + SizeOf(UInt24);
  3620. Result := PUCA_PropItemContextTreeRec(p);
  3621. end;
  3622. procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
  3623. begin
  3624. SetBit(Flags,FLAG_DELETION,AValue);
  3625. end;
  3626. function TUCA_PropItemRec.IsDeleted: Boolean;
  3627. begin
  3628. Result := IsBitON(Flags,FLAG_DELETION);
  3629. end;
  3630. function TUCA_PropItemRec.IsValid() : Boolean;
  3631. begin
  3632. Result := IsBitON(Flags,FLAG_VALID);
  3633. end;
  3634. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  3635. begin
  3636. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  3637. end;
  3638. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  3639. begin
  3640. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  3641. end;
  3642. function TUCA_PropItemRec.GetCodePoint: UInt24;
  3643. begin
  3644. if HasCodePoint() then begin
  3645. if Contextual then
  3646. Result := PUInt24(
  3647. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  3648. Cardinal(GetContext()^.Size)
  3649. )^
  3650. else
  3651. Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  3652. end else begin
  3653. raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  3654. end
  3655. end;
  3656. function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
  3657. var
  3658. a, b : PUCA_LineContextItemRec;
  3659. i, hb : Integer;
  3660. begin
  3661. if (Item1 = Item2) then
  3662. exit(0);
  3663. if (Item1 = nil) then
  3664. exit(-1);
  3665. if (Item2 = nil) then
  3666. exit(1);
  3667. a := Item1;
  3668. b := Item2;
  3669. if (a^.CodePoints = b^.CodePoints) then
  3670. exit(0);
  3671. Result := 1;
  3672. hb := Length(b^.CodePoints) - 1;
  3673. for i := 0 to Length(a^.CodePoints) - 1 do begin
  3674. if (i > hb) then
  3675. exit;
  3676. if (a^.CodePoints[i] < b^.CodePoints[i]) then
  3677. exit(-1);
  3678. if (a^.CodePoints[i] > b^.CodePoints[i]) then
  3679. exit(1);
  3680. end;
  3681. if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
  3682. exit(0);
  3683. exit(-1);
  3684. end;
  3685. function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
  3686. var
  3687. r : TAVLTree;
  3688. i : Integer;
  3689. begin
  3690. r := TAVLTree.Create(@avl_CompareCodePoints);
  3691. try
  3692. for i := 0 to Length(AContext^.Data) - 1 do
  3693. r.Add(@AContext^.Data[i]);
  3694. Result := r;
  3695. except
  3696. FreeAndNil(r);
  3697. raise;
  3698. end;
  3699. end;
  3700. function ConstructContextTree(
  3701. const AContext : PUCA_LineContextRec;
  3702. var ADestBuffer;
  3703. const ADestBufferLength : Cardinal
  3704. ) : PUCA_PropItemContextTreeRec;
  3705. function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
  3706. var
  3707. kitem : PUCA_LineContextItemRec;
  3708. begin
  3709. if (AItem = nil) then
  3710. exit(0);
  3711. kitem := AItem.Data;
  3712. Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
  3713. SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
  3714. SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
  3715. (Length(kitem^.CodePoints)*SizeOf(UInt24)) +
  3716. SizeOf(PUCA_PropItemContextRec^.WeightCount) +
  3717. (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
  3718. end;
  3719. function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
  3720. begin
  3721. if (AItem = nil) then
  3722. exit(0);
  3723. Result := CalcItemOnlySize(AItem);
  3724. if (AItem.Left <> nil) then
  3725. Result := Result + CalcItemSize(AItem.Left);
  3726. if (AItem.Right <> nil) then
  3727. Result := Result + CalcItemSize(AItem.Right);
  3728. end;
  3729. function CalcSize(AData : TAVLTree) : Cardinal;
  3730. begin
  3731. Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
  3732. end;
  3733. function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
  3734. var
  3735. k : Integer;
  3736. kitem : PUCA_LineContextItemRec;
  3737. kpcp : PUInt24;
  3738. kpw : PUCA_PropWeights;
  3739. pextra : PtrUInt;
  3740. pnext : PUCA_PropItemContextTreeNodeRec;
  3741. begin
  3742. kitem := ASource.Data;
  3743. ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
  3744. ADest^.Data.WeightCount := Length(kitem^.Weights);
  3745. pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
  3746. SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
  3747. if (ADest^.Data.CodePointCount > 0) then begin
  3748. kpcp := PUInt24(pextra);
  3749. for k := 0 to ADest^.Data.CodePointCount - 1 do begin
  3750. kpcp^ := kitem^.CodePoints[k];
  3751. Inc(kpcp);
  3752. end;
  3753. end;
  3754. if (ADest^.Data.WeightCount > 0) then begin
  3755. kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
  3756. for k := 0 to ADest^.Data.WeightCount - 1 do begin
  3757. kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
  3758. kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
  3759. kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
  3760. Inc(kpw);
  3761. end;
  3762. end;
  3763. Result := CalcItemOnlySize(ASource);
  3764. if (ASource.Left <> nil) then begin
  3765. pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
  3766. ADest^.Left := Result;
  3767. Result := Result + ConstructItem(ASource.Left,pnext);
  3768. end else begin
  3769. ADest^.Left := 0;
  3770. end;
  3771. if (ASource.Right <> nil) then begin
  3772. pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
  3773. ADest^.Right := Result;
  3774. Result := Result + ConstructItem(ASource.Right,pnext);
  3775. end else begin
  3776. ADest^.Right := 0;
  3777. end;
  3778. end;
  3779. var
  3780. c : PtrUInt;
  3781. r : PUCA_PropItemContextTreeRec;
  3782. p : PUCA_PropItemContextTreeNodeRec;
  3783. tempTree : TAVLTree;
  3784. begin
  3785. tempTree := ConstructAvlContextTree(AContext);
  3786. try
  3787. c := CalcSize(tempTree);
  3788. if (ADestBufferLength > 0) and (c > ADestBufferLength) then
  3789. raise Exception.Create(SInsufficientMemoryBuffer);
  3790. r := @ADestBuffer;
  3791. r^.Size := c;
  3792. p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
  3793. ConstructItem(tempTree.Root,p);
  3794. finally
  3795. tempTree.Free();
  3796. end;
  3797. Result := r;
  3798. end;
  3799. procedure ReverseBytes(var AData; const ALength : Integer);
  3800. var
  3801. i,j : PtrInt;
  3802. c : Byte;
  3803. p : PByte;
  3804. begin
  3805. if (ALength = 1) then
  3806. exit;
  3807. p := @AData;
  3808. j := ALength div 2;
  3809. for i := 0 to Pred(j) do begin
  3810. c := p[i];
  3811. p[i] := p[(ALength - 1 ) - i];
  3812. p[(ALength - 1 ) - i] := c;
  3813. end;
  3814. end;
  3815. procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
  3816. var
  3817. p : PByte;
  3818. i : PtrInt;
  3819. begin
  3820. if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
  3821. p := @AValue;
  3822. for i := 0 to Pred(AArrayLength) do begin
  3823. ReverseBytes(p^,AItemSize);
  3824. Inc(p,AItemSize);
  3825. end;
  3826. end;
  3827. end;
  3828. procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
  3829. var
  3830. k : PtrUInt;
  3831. p_s, p_d : PByte;
  3832. begin
  3833. d^.Left := s^.Left;
  3834. ReverseBytes(d^.Left,SizeOf(d^.Left));
  3835. d^.Right := s^.Right;
  3836. ReverseBytes(d^.Right,SizeOf(d^.Right));
  3837. d^.Data.CodePointCount := s^.Data.CodePointCount;
  3838. ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
  3839. d^.Data.WeightCount := s^.Data.WeightCount;
  3840. ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
  3841. k := SizeOf(TUCA_PropItemContextTreeNodeRec);
  3842. p_s := PByte(PtrUInt(s) + k);
  3843. p_d := PByte(PtrUInt(d) + k);
  3844. k := (s^.Data.CodePointCount*SizeOf(UInt24));
  3845. Move(p_s^,p_d^, k);
  3846. ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
  3847. p_s := PByte(PtrUInt(p_s) + k);
  3848. p_d := PByte(PtrUInt(p_d) + k);
  3849. k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
  3850. Move(p_s^,p_d^,k);
  3851. ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
  3852. if (s^.Left > 0) then
  3853. ReverseContextNodeFromNativeEndian(
  3854. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
  3855. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
  3856. );
  3857. if (s^.Right > 0) then
  3858. ReverseContextNodeFromNativeEndian(
  3859. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
  3860. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
  3861. );
  3862. end;
  3863. procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
  3864. var
  3865. k : PtrUInt;
  3866. begin
  3867. d^.Size := s^.Size;
  3868. ReverseBytes(d^.Size,SizeOf(d^.Size));
  3869. if (s^.Size = 0) then
  3870. exit;
  3871. k := SizeOf(s^.Size);
  3872. ReverseContextNodeFromNativeEndian(
  3873. PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
  3874. PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
  3875. );
  3876. end;
  3877. procedure ReverseFromNativeEndian(
  3878. const AData : PUCA_PropItemRec;
  3879. const ADataLen : Cardinal;
  3880. const ADest : PUCA_PropItemRec
  3881. );
  3882. var
  3883. s, d : PUCA_PropItemRec;
  3884. sCtx, dCtx : PUCA_PropItemContextTreeRec;
  3885. dataEnd : PtrUInt;
  3886. k, i : PtrUInt;
  3887. p_s, p_d : PByte;
  3888. pw_s, pw_d : PUCA_PropWeights;
  3889. begin
  3890. dataEnd := PtrUInt(AData) + ADataLen;
  3891. s := AData;
  3892. d := ADest;
  3893. while True do begin
  3894. d^.WeightLength := s^.WeightLength;
  3895. ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
  3896. d^.ChildCount := s^.ChildCount;
  3897. ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
  3898. d^.Size := s^.Size;
  3899. ReverseBytes(d^.Size,SizeOf(d^.Size));
  3900. d^.Flags := s^.Flags;
  3901. ReverseBytes(d^.Flags,SizeOf(d^.Flags));
  3902. if s^.Contextual then begin
  3903. k := SizeOf(TUCA_PropItemRec);
  3904. if s^.HasCodePoint() then
  3905. k := k + SizeOf(UInt24);
  3906. sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
  3907. dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
  3908. ReverseContextFromNativeEndian(sCtx,dCtx);
  3909. end;
  3910. if s^.HasCodePoint() then begin
  3911. if s^.Contextual then
  3912. k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
  3913. else
  3914. k := s^.GetSelfOnlySize() - SizeOf(UInt24);
  3915. p_s := PByte(PtrUInt(s) + k);
  3916. p_d := PByte(PtrUInt(d) + k);
  3917. PUInt24(p_d)^ := PUInt24(p_s)^;
  3918. ReverseBytes(p_d^,SizeOf(UInt24));
  3919. end;
  3920. if (s^.WeightLength > 0) then begin
  3921. k := SizeOf(TUCA_PropItemRec);
  3922. p_s := PByte(PtrUInt(s) + k);
  3923. p_d := PByte(PtrUInt(d) + k);
  3924. k := SizeOf(Word);
  3925. PWord(p_d)^ := PWord(p_s)^;
  3926. ReverseBytes(p_d^,k);
  3927. p_s := PByte(PtrUInt(p_s) + k);
  3928. p_d := PByte(PtrUInt(p_d) + k);
  3929. if s^.IsWeightCompress_1() then begin
  3930. k := SizeOf(Byte);
  3931. PByte(p_d)^ := PByte(p_s)^;
  3932. end else begin
  3933. k := SizeOf(Word);
  3934. PWord(p_d)^ := PWord(p_s)^;
  3935. end;
  3936. ReverseBytes(p_d^,k);
  3937. p_s := PByte(PtrUInt(p_s) + k);
  3938. p_d := PByte(PtrUInt(p_d) + k);
  3939. if s^.IsWeightCompress_2() then begin
  3940. k := SizeOf(Byte);
  3941. PByte(p_d)^ := PByte(p_s)^;
  3942. end else begin
  3943. k := SizeOf(Word);
  3944. PWord(p_d)^ := PWord(p_s)^;
  3945. end;
  3946. ReverseBytes(p_d^,k);
  3947. if (s^.WeightLength > 1) then begin
  3948. pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
  3949. pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
  3950. for i := 1 to s^.WeightLength - 1 do begin
  3951. pw_d^.Weights[0] := pw_s^.Weights[0];
  3952. pw_d^.Weights[1] := pw_s^.Weights[1];
  3953. pw_d^.Weights[2] := pw_s^.Weights[2];
  3954. ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
  3955. Inc(pw_s);
  3956. Inc(pw_d);
  3957. end;
  3958. end;
  3959. end;
  3960. k := s^.GetSelfOnlySize();
  3961. s := PUCA_PropItemRec(PtrUInt(s)+k);
  3962. d := PUCA_PropItemRec(PtrUInt(d)+k);
  3963. if (PtrUInt(s) >= dataEnd) then
  3964. Break;
  3965. end;
  3966. if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
  3967. raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
  3968. end;
  3969. //------------------------------------------------------------------------------
  3970. procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
  3971. var
  3972. k : PtrUInt;
  3973. p_s, p_d : PByte;
  3974. begin
  3975. d^.Left := s^.Left;
  3976. ReverseBytes(d^.Left,SizeOf(d^.Left));
  3977. d^.Right := s^.Right;
  3978. ReverseBytes(d^.Right,SizeOf(d^.Right));
  3979. d^.Data.CodePointCount := s^.Data.CodePointCount;
  3980. ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
  3981. d^.Data.WeightCount := s^.Data.WeightCount;
  3982. ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
  3983. k := SizeOf(TUCA_PropItemContextTreeNodeRec);
  3984. p_s := PByte(PtrUInt(s) + k);
  3985. p_d := PByte(PtrUInt(d) + k);
  3986. k := (d^.Data.CodePointCount*SizeOf(UInt24));
  3987. Move(p_s^,p_d^, k);
  3988. ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
  3989. p_s := PByte(PtrUInt(p_s) + k);
  3990. p_d := PByte(PtrUInt(p_d) + k);
  3991. k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
  3992. Move(p_s^,p_d^,k);
  3993. ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
  3994. if (d^.Left > 0) then
  3995. ReverseContextNodeToNativeEndian(
  3996. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
  3997. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
  3998. );
  3999. if (d^.Right > 0) then
  4000. ReverseContextNodeToNativeEndian(
  4001. PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
  4002. PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
  4003. );
  4004. end;
  4005. procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
  4006. var
  4007. k : PtrUInt;
  4008. begin
  4009. d^.Size := s^.Size;
  4010. ReverseBytes(d^.Size,SizeOf(d^.Size));
  4011. if (s^.Size = 0) then
  4012. exit;
  4013. k := SizeOf(s^.Size);
  4014. ReverseContextNodeToNativeEndian(
  4015. PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
  4016. PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
  4017. );
  4018. end;
  4019. procedure ReverseToNativeEndian(
  4020. const AData : PUCA_PropItemRec;
  4021. const ADataLen : Cardinal;
  4022. const ADest : PUCA_PropItemRec
  4023. );
  4024. var
  4025. s, d : PUCA_PropItemRec;
  4026. sCtx, dCtx : PUCA_PropItemContextTreeRec;
  4027. dataEnd : PtrUInt;
  4028. k, i : PtrUInt;
  4029. p_s, p_d : PByte;
  4030. pw_s, pw_d : PUCA_PropWeights;
  4031. begin
  4032. dataEnd := PtrUInt(AData) + ADataLen;
  4033. s := AData;
  4034. d := ADest;
  4035. while True do begin
  4036. d^.WeightLength := s^.WeightLength;
  4037. ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
  4038. d^.ChildCount := s^.ChildCount;
  4039. ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
  4040. d^.Size := s^.Size;
  4041. ReverseBytes(d^.Size,SizeOf(d^.Size));
  4042. d^.Flags := s^.Flags;
  4043. ReverseBytes(d^.Flags,SizeOf(d^.Flags));
  4044. if d^.Contextual then begin
  4045. k := SizeOf(TUCA_PropItemRec);
  4046. if d^.HasCodePoint() then
  4047. k := k + SizeOf(UInt24);
  4048. sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
  4049. dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
  4050. ReverseContextToNativeEndian(sCtx,dCtx);
  4051. end;
  4052. if d^.HasCodePoint() then begin
  4053. if d^.Contextual then
  4054. k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
  4055. else
  4056. k := d^.GetSelfOnlySize() - SizeOf(UInt24);
  4057. p_s := PByte(PtrUInt(s) + k);
  4058. p_d := PByte(PtrUInt(d) + k);
  4059. PUInt24(p_d)^ := PUInt24(p_s)^;
  4060. ReverseBytes(p_d^,SizeOf(UInt24));
  4061. end;
  4062. if (d^.WeightLength > 0) then begin
  4063. k := SizeOf(TUCA_PropItemRec);
  4064. p_s := PByte(PtrUInt(s) + k);
  4065. p_d := PByte(PtrUInt(d) + k);
  4066. k := SizeOf(Word);
  4067. PWord(p_d)^ := PWord(p_s)^;
  4068. ReverseBytes(p_d^,k);
  4069. p_s := PByte(PtrUInt(p_s) + k);
  4070. p_d := PByte(PtrUInt(p_d) + k);
  4071. if d^.IsWeightCompress_1() then begin
  4072. k := SizeOf(Byte);
  4073. PByte(p_d)^ := PByte(p_s)^;
  4074. end else begin
  4075. k := SizeOf(Word);
  4076. PWord(p_d)^ := PWord(p_s)^;
  4077. end;
  4078. ReverseBytes(p_d^,k);
  4079. p_s := PByte(PtrUInt(p_s) + k);
  4080. p_d := PByte(PtrUInt(p_d) + k);
  4081. if d^.IsWeightCompress_2() then begin
  4082. k := SizeOf(Byte);
  4083. PByte(p_d)^ := PByte(p_s)^;
  4084. end else begin
  4085. k := SizeOf(Word);
  4086. PWord(p_d)^ := PWord(p_s)^;
  4087. end;
  4088. ReverseBytes(p_d^,k);
  4089. if (d^.WeightLength > 1) then begin
  4090. pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
  4091. pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
  4092. for i := 1 to d^.WeightLength - 1 do begin
  4093. pw_d^.Weights[0] := pw_s^.Weights[0];
  4094. pw_d^.Weights[1] := pw_s^.Weights[1];
  4095. pw_d^.Weights[2] := pw_s^.Weights[2];
  4096. ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
  4097. Inc(pw_s);
  4098. Inc(pw_d);
  4099. end;
  4100. end;
  4101. end;
  4102. k := d^.GetSelfOnlySize();
  4103. s := PUCA_PropItemRec(PtrUInt(s)+k);
  4104. d := PUCA_PropItemRec(PtrUInt(d)+k);
  4105. if (PtrUInt(s) >= dataEnd) then
  4106. Break;
  4107. end;
  4108. if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
  4109. raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
  4110. end;
  4111. procedure Check(const ACondition : Boolean; const AMsg : string);overload;
  4112. begin
  4113. if not ACondition then
  4114. raise Exception.Create(AMsg);
  4115. end;
  4116. procedure Check(
  4117. const ACondition : Boolean;
  4118. const AFormatMsg : string;
  4119. const AArgs : array of const
  4120. );overload;
  4121. begin
  4122. Check(ACondition,Format(AFormatMsg,AArgs));
  4123. end;
  4124. procedure Check(const ACondition : Boolean);overload;
  4125. begin
  4126. Check(ACondition,'Check failed.')
  4127. end;
  4128. procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
  4129. var
  4130. i : Integer;
  4131. begin
  4132. if (ALength > 0) then begin
  4133. for i := 0 to ALength - 1 do begin
  4134. Check(a[i].Weights[0]=b[i].Weights[0]);
  4135. Check(a[i].Weights[1]=b[i].Weights[1]);
  4136. Check(a[i].Weights[2]=b[i].Weights[2]);
  4137. end;
  4138. end;
  4139. end;
  4140. procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
  4141. var
  4142. i : Integer;
  4143. begin
  4144. if (ALength > 0) then begin
  4145. for i := 0 to ALength - 1 do
  4146. Check(a[i]=b[i]);
  4147. end;
  4148. end;
  4149. procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
  4150. var
  4151. a, b : PUCA_PropItemContextTreeNodeRec;
  4152. k : Cardinal;
  4153. begin
  4154. if (AProp1=nil) then begin
  4155. Check(AProp2=nil);
  4156. exit;
  4157. end;
  4158. a := AProp1;
  4159. b := AProp2;
  4160. Check(a^.Left=b^.Left);
  4161. Check(a^.Right=b^.Right);
  4162. Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
  4163. Check(a^.Data.WeightCount=b^.Data.WeightCount);
  4164. k := SizeOf(a^.Data);
  4165. CompareCodePoints(
  4166. PUInt24(PtrUInt(a)+k),
  4167. PUInt24(PtrUInt(b)+k),
  4168. a^.Data.CodePointCount
  4169. );
  4170. k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
  4171. CompareWeights(
  4172. PUCA_PropWeights(PtrUInt(a)+k),
  4173. PUCA_PropWeights(PtrUInt(b)+k),
  4174. a^.Data.WeightCount
  4175. );
  4176. if (a^.Left > 0) then begin
  4177. k := a^.Left;
  4178. CompareContextNode(
  4179. PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
  4180. PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
  4181. );
  4182. end;
  4183. if (a^.Right > 0) then begin
  4184. k := a^.Right;
  4185. CompareContextNode(
  4186. PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
  4187. PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
  4188. );
  4189. end;
  4190. end;
  4191. procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
  4192. var
  4193. a, b : PUCA_PropItemContextTreeNodeRec;
  4194. k : Integer;
  4195. begin
  4196. if (AProp1=nil) then begin
  4197. Check(AProp2=nil);
  4198. exit;
  4199. end;
  4200. Check(AProp1^.Size=AProp2^.Size);
  4201. k := Cardinal(AProp1^.Size);
  4202. a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
  4203. b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
  4204. CompareContextNode(a,b);
  4205. end;
  4206. procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
  4207. var
  4208. a, b, pend : PUCA_PropItemRec;
  4209. wa, wb : array of TUCA_PropWeights;
  4210. k : Integer;
  4211. begin
  4212. if (ADataLen <= 0) then
  4213. exit;
  4214. a := PUCA_PropItemRec(AProp1);
  4215. b := PUCA_PropItemRec(AProp2);
  4216. pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
  4217. while (a<pend) do begin
  4218. Check(a^.WeightLength=b^.WeightLength);
  4219. Check(a^.ChildCount=b^.ChildCount);
  4220. Check(a^.Size=b^.Size);
  4221. Check(a^.Flags=b^.Flags);
  4222. if a^.HasCodePoint() then
  4223. Check(a^.CodePoint = b^.CodePoint);
  4224. if (a^.WeightLength > 0) then begin
  4225. k := a^.WeightLength;
  4226. SetLength(wa,k);
  4227. SetLength(wb,k);
  4228. a^.GetWeightArray(@wa[0]);
  4229. b^.GetWeightArray(@wb[0]);
  4230. CompareWeights(@wa[0],@wb[0],k);
  4231. end;
  4232. if a^.Contextual then
  4233. CompareContext(a^.GetContext(),b^.GetContext());
  4234. Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
  4235. k := a^.GetSelfOnlySize();
  4236. a := PUCA_PropItemRec(PtrUInt(a)+k);
  4237. b := PUCA_PropItemRec(PtrUInt(b)+k);
  4238. end;
  4239. end;
  4240. initialization
  4241. FS := DefaultFormatSettings;
  4242. FS.DecimalSeparator := '.';
  4243. end.