unicodedata.pas 94 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit unicodedata;
  17. {$mode delphi}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$SCOPEDENUMS ON}
  21. {$pointermath on}
  22. {$define USE_INLINE}
  23. {$warn 4056 off} //Conversion between ordinals and pointers is not portable
  24. { $define uni_debug}
  25. interface
  26. const
  27. MAX_WORD = High(Word);
  28. LOW_SURROGATE_BEGIN = Word($DC00);
  29. LOW_SURROGATE_END = Word($DFFF);
  30. HIGH_SURROGATE_BEGIN = Word($D800);
  31. HIGH_SURROGATE_END = Word($DBFF);
  32. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  33. UCS4_HALF_BASE = LongWord($10000);
  34. UCS4_HALF_MASK = Word($3FF);
  35. MAX_LEGAL_UTF32 = $10FFFF;
  36. const
  37. // Unicode General Category
  38. UGC_UppercaseLetter = 0;
  39. UGC_LowercaseLetter = 1;
  40. UGC_TitlecaseLetter = 2;
  41. UGC_ModifierLetter = 3;
  42. UGC_OtherLetter = 4;
  43. UGC_NonSpacingMark = 5;
  44. UGC_CombiningMark = 6;
  45. UGC_EnclosingMark = 7;
  46. UGC_DecimalNumber = 8;
  47. UGC_LetterNumber = 9;
  48. UGC_OtherNumber = 10;
  49. UGC_ConnectPunctuation = 11;
  50. UGC_DashPunctuation = 12;
  51. UGC_OpenPunctuation = 13;
  52. UGC_ClosePunctuation = 14;
  53. UGC_InitialPunctuation = 15;
  54. UGC_FinalPunctuation = 16;
  55. UGC_OtherPunctuation = 17;
  56. UGC_MathSymbol = 18;
  57. UGC_CurrencySymbol = 19;
  58. UGC_ModifierSymbol = 20;
  59. UGC_OtherSymbol = 21;
  60. UGC_SpaceSeparator = 22;
  61. UGC_LineSeparator = 23;
  62. UGC_ParagraphSeparator = 24;
  63. UGC_Control = 25;
  64. UGC_Format = 26;
  65. UGC_Surrogate = 27;
  66. UGC_PrivateUse = 28;
  67. UGC_Unassigned = 29;
  68. type
  69. TUInt24Rec = packed record
  70. public
  71. {$ifdef FPC_LITTLE_ENDIAN}
  72. byte0, byte1, byte2 : Byte;
  73. {$else FPC_LITTLE_ENDIAN}
  74. byte2, byte1, byte0 : Byte;
  75. {$endif FPC_LITTLE_ENDIAN}
  76. public
  77. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  78. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  79. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  80. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  81. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  82. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  83. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  84. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  85. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  86. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  87. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  88. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  89. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  90. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  91. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  92. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  93. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  94. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  95. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  96. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  97. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  98. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  99. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  100. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. end;
  107. UInt24 = TUInt24Rec;
  108. PUInt24 = ^UInt24;
  109. const
  110. ZERO_UINT24 : UInt24 =
  111. {$ifdef FPC_LITTLE_ENDIAN}
  112. (byte0 : 0; byte1 : 0; byte2 : 0;);
  113. {$else FPC_LITTLE_ENDIAN}
  114. (byte2 : 0; byte1 : 0; byte0 : 0;);
  115. {$endif FPC_LITTLE_ENDIAN}
  116. type
  117. PUC_Prop = ^TUC_Prop;
  118. { TUC_Prop }
  119. TUC_Prop = packed record
  120. private
  121. function GetCategory : Byte;inline;
  122. procedure SetCategory(AValue : Byte);
  123. function GetWhiteSpace : Boolean;inline;
  124. procedure SetWhiteSpace(AValue : Boolean);
  125. function GetHangulSyllable : Boolean;inline;
  126. procedure SetHangulSyllable(AValue : Boolean);
  127. function GetNumericValue: Double;inline;
  128. public
  129. CategoryData : Byte;
  130. public
  131. CCC : Byte;
  132. NumericIndex : Byte;
  133. SimpleUpperCase : UInt24;
  134. SimpleLowerCase : UInt24;
  135. DecompositionID : SmallInt;
  136. public
  137. property Category : Byte read GetCategory write SetCategory;
  138. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  139. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  140. property NumericValue : Double read GetNumericValue;
  141. end;
  142. type
  143. TUCA_PropWeights = packed record
  144. Weights : array[0..2] of Word;
  145. end;
  146. PUCA_PropWeights = ^TUCA_PropWeights;
  147. TUCA_PropItemContextRec = packed record
  148. public
  149. CodePointCount : Byte;
  150. WeightCount : Byte;
  151. public
  152. function GetCodePoints() : PUInt24;inline;
  153. function GetWeights() : PUCA_PropWeights;inline;
  154. end;
  155. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  156. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  157. TUCA_PropItemContextTreeNodeRec = packed record
  158. public
  159. Left : Word;
  160. Right : Word;
  161. Data : TUCA_PropItemContextRec;
  162. public
  163. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  164. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  165. end;
  166. { TUCA_PropItemContextTreeRec }
  167. TUCA_PropItemContextTreeRec = packed record
  168. public
  169. Size : UInt24;
  170. public
  171. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  172. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  173. function Find(
  174. const AChars : PUInt24;
  175. const ACharCount : Integer;
  176. out ANode : PUCA_PropItemContextTreeNodeRec
  177. ) : Boolean;
  178. end;
  179. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  180. { TUCA_PropItemRec }
  181. TUCA_PropItemRec = packed record
  182. private
  183. const FLAG_VALID = 0;
  184. const FLAG_CODEPOINT = 1;
  185. const FLAG_CONTEXTUAL = 2;
  186. const FLAG_DELETION = 3;
  187. const FLAG_COMPRESS_WEIGHT_1 = 6;
  188. const FLAG_COMPRESS_WEIGHT_2 = 7;
  189. private
  190. function GetCodePoint() : UInt24;inline;
  191. public
  192. WeightLength : Byte;
  193. ChildCount : Byte;
  194. Size : Word;
  195. Flags : Byte;
  196. public
  197. function HasCodePoint() : Boolean;inline;
  198. property CodePoint : UInt24 read GetCodePoint;
  199. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  200. function IsValid() : Boolean;inline;
  201. //function GetWeightArray() : PUCA_PropWeights;inline;
  202. procedure GetWeightArray(ADest : PUCA_PropWeights);
  203. function GetSelfOnlySize() : Cardinal;inline;
  204. function GetContextual() : Boolean;inline;
  205. property Contextual : Boolean read GetContextual;
  206. function GetContext() : PUCA_PropItemContextTreeRec;
  207. function IsDeleted() : Boolean;inline;
  208. function IsWeightCompress_1() : Boolean;inline;
  209. function IsWeightCompress_2() : Boolean;inline;
  210. end;
  211. PUCA_PropItemRec = ^TUCA_PropItemRec;
  212. TUCA_VariableKind = (
  213. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  214. ucaIgnoreSP // This one is not implemented !
  215. );
  216. TCollationName = string[128];
  217. PUCA_DataBook = ^TUCA_DataBook;
  218. TUCA_DataBook = packed record
  219. public
  220. Base : PUCA_DataBook;
  221. Version : TCollationName;
  222. CollationName : TCollationName;
  223. VariableWeight : TUCA_VariableKind;
  224. Backwards : array[0..3] of Boolean;
  225. BMP_Table1 : PByte;
  226. BMP_Table2 : PUInt24;
  227. OBMP_Table1 : PWord;
  228. OBMP_Table2 : PUInt24;
  229. PropCount : Integer;
  230. Props : PUCA_PropItemRec;
  231. VariableLowLimit : Word;
  232. VariableHighLimit : Word;
  233. Dynamic : Boolean;
  234. public
  235. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  236. end;
  237. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  238. TCollationFields = set of TCollationField;
  239. const
  240. ROOT_COLLATION_NAME = 'DUCET';
  241. ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
  242. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  243. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  244. function UnicodeIsSurrogatePair(
  245. const AHighSurrogate,
  246. ALowSurrogate : UnicodeChar
  247. ) : Boolean;inline;
  248. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  249. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  250. function UnicodeToUpper(
  251. const AString : UnicodeString;
  252. const AIgnoreInvalidSequence : Boolean;
  253. out AResultString : UnicodeString
  254. ) : Integer;
  255. function UnicodeToLower(
  256. const AString : UnicodeString;
  257. const AIgnoreInvalidSequence : Boolean;
  258. out AResultString : UnicodeString
  259. ) : Integer;
  260. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  261. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  262. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  263. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  264. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  265. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  266. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  267. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  268. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  269. type
  270. TUCASortKeyItem = Word;
  271. TUCASortKey = array of TUCASortKeyItem;
  272. function ComputeSortKey(
  273. const AString : UnicodeString;
  274. const ACollation : PUCA_DataBook
  275. ) : TUCASortKey;inline;overload;
  276. function ComputeSortKey(
  277. const AStr : PUnicodeChar;
  278. const ALength : SizeInt;
  279. const ACollation : PUCA_DataBook
  280. ) : TUCASortKey;overload;
  281. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  282. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  283. function IncrementalCompareString(
  284. const AStrA : PUnicodeChar;
  285. const ALengthA : SizeInt;
  286. const AStrB : PUnicodeChar;
  287. const ALengthB : SizeInt;
  288. const ACollation : PUCA_DataBook
  289. ) : Integer;overload;
  290. function IncrementalCompareString(
  291. const AStrA,
  292. AStrB : UnicodeString;
  293. const ACollation : PUCA_DataBook
  294. ) : Integer;inline;overload;
  295. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
  296. function RegisterCollation(
  297. const ADirectory,
  298. ALanguage : string
  299. ) : Boolean;overload;
  300. function UnregisterCollation(const AName : ansistring): Boolean;
  301. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  302. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  303. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  304. function GetCollationCount() : Integer;
  305. procedure PrepareCollation(
  306. ACollation : PUCA_DataBook;
  307. const ABaseName : ansistring;
  308. const AChangedFields : TCollationFields
  309. );
  310. function LoadCollation(
  311. const AData : Pointer;
  312. const ADataLength : Integer
  313. ) : PUCA_DataBook;overload;
  314. function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
  315. function LoadCollation(
  316. const ADirectory,
  317. ALanguage : string
  318. ) : PUCA_DataBook;overload;
  319. procedure FreeCollation(AItem : PUCA_DataBook);
  320. type
  321. TEndianKind = (Little, Big);
  322. const
  323. ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
  324. {$IFDEF ENDIAN_LITTLE}
  325. ENDIAN_NATIVE = TEndianKind.Little;
  326. ENDIAN_NON_NATIVE = TEndianKind.Big;
  327. {$ENDIF ENDIAN_LITTLE}
  328. {$IFDEF ENDIAN_BIG}
  329. ENDIAN_NATIVE = TEndianKind.Big;
  330. ENDIAN_NON_NATIVE = TEndianKind.Little;
  331. {$ENDIF ENDIAN_BIG}
  332. resourcestring
  333. SCollationNotFound = 'Collation not found : "%s".';
  334. implementation
  335. uses
  336. unicodenumtable;
  337. type
  338. TCardinalRec = packed record
  339. {$ifdef FPC_LITTLE_ENDIAN}
  340. byte0, byte1, byte2, byte3 : Byte;
  341. {$else FPC_LITTLE_ENDIAN}
  342. byte3, byte2, byte1, byte0 : Byte;
  343. {$endif FPC_LITTLE_ENDIAN}
  344. end;
  345. TWordRec = packed record
  346. {$ifdef FPC_LITTLE_ENDIAN}
  347. byte0, byte1 : Byte;
  348. {$else FPC_LITTLE_ENDIAN}
  349. byte1, byte0 : Byte;
  350. {$endif FPC_LITTLE_ENDIAN}
  351. end;
  352. { TUInt24Rec }
  353. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  354. begin
  355. TCardinalRec(Result).byte0 := a.byte0;
  356. TCardinalRec(Result).byte1 := a.byte1;
  357. TCardinalRec(Result).byte2 := a.byte2;
  358. TCardinalRec(Result).byte3 := 0;
  359. end;
  360. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  361. begin
  362. Result := Cardinal(a);
  363. end;
  364. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  365. begin
  366. {$IFOPT R+}
  367. if (a > $FFFF) then
  368. Error(reIntOverflow);
  369. {$ENDIF R+}
  370. TWordRec(Result).byte0 := a.byte0;
  371. TWordRec(Result).byte1 := a.byte1;
  372. end;
  373. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  374. begin
  375. {$IFOPT R+}
  376. if (a > $FF) then
  377. Error(reIntOverflow);
  378. {$ENDIF R+}
  379. Result := a.byte0;
  380. end;
  381. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  382. begin
  383. {$IFOPT R+}
  384. if (a > $FFFFFF) then
  385. Error(reIntOverflow);
  386. {$ENDIF R+}
  387. Result.byte0 := TCardinalRec(a).byte0;
  388. Result.byte1 := TCardinalRec(a).byte1;
  389. Result.byte2 := TCardinalRec(a).byte2;
  390. end;
  391. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  392. begin
  393. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  394. end;
  395. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  396. begin
  397. Result := (TCardinalRec(b).byte3 = 0) and
  398. (a.byte0 = TCardinalRec(b).byte0) and
  399. (a.byte1 = TCardinalRec(b).byte1) and
  400. (a.byte2 = TCardinalRec(b).byte2);
  401. end;
  402. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  403. begin
  404. Result := (b = a);
  405. end;
  406. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  407. begin
  408. Result := (LongInt(a) = b);
  409. end;
  410. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  411. begin
  412. Result := (b = a);
  413. end;
  414. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  415. begin
  416. Result := (a.byte2 = 0) and
  417. (a.byte0 = TWordRec(b).byte0) and
  418. (a.byte1 = TWordRec(b).byte1);
  419. end;
  420. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  421. begin
  422. Result := (b = a);
  423. end;
  424. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  425. begin
  426. Result := (a.byte2 = 0) and
  427. (a.byte1 = 0) and
  428. (a.byte0 = b);
  429. end;
  430. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  431. begin
  432. Result := (b = a);
  433. end;
  434. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  435. begin
  436. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  437. end;
  438. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  439. begin
  440. Result := (TCardinalRec(b).byte3 <> 0) or
  441. (a.byte0 <> TCardinalRec(b).byte0) or
  442. (a.byte1 <> TCardinalRec(b).byte1) or
  443. (a.byte2 <> TCardinalRec(b).byte2);
  444. end;
  445. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  446. begin
  447. Result := (b <> a);
  448. end;
  449. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  450. begin
  451. Result := (a.byte2 > b.byte2) or
  452. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  453. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  454. end;
  455. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  456. begin
  457. Result := Cardinal(a) > b;
  458. end;
  459. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  460. begin
  461. Result := a > Cardinal(b);
  462. end;
  463. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  464. begin
  465. Result := (a.byte2 > b.byte2) or
  466. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  467. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  468. end;
  469. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  470. begin
  471. Result := Cardinal(a) >= b;
  472. end;
  473. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  474. begin
  475. Result := a >= Cardinal(b);
  476. end;
  477. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  478. begin
  479. Result := (b > a);
  480. end;
  481. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  482. begin
  483. Result := Cardinal(a) < b;
  484. end;
  485. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  486. begin
  487. Result := a < Cardinal(b);
  488. end;
  489. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  490. begin
  491. Result := (b >= a);
  492. end;
  493. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  494. begin
  495. Result := Cardinal(a) <= b;
  496. end;
  497. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  498. begin
  499. Result := a <= Cardinal(b);
  500. end;
  501. type
  502. TBitOrder = 0..7;
  503. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  504. begin
  505. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  506. end;
  507. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  508. begin
  509. if AValue then
  510. AData := AData or (1 shl (ABit mod 8))
  511. else
  512. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  513. end;
  514. var
  515. CollationTable : array of PUCA_DataBook;
  516. function IndexOfCollation(const AName : string) : Integer;
  517. var
  518. i, c : Integer;
  519. p : Pointer;
  520. begin
  521. c := Length(AName);
  522. p := @AName[1];
  523. for i := 0 to Length(CollationTable) - 1 do begin
  524. if (Length(CollationTable[i]^.CollationName) = c) and
  525. (CompareByte((CollationTable[i]^.CollationName[1]),p^,c)=0)
  526. then
  527. exit(i);
  528. end;
  529. Result := -1;
  530. end;
  531. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  532. var
  533. i : Integer;
  534. begin
  535. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  536. if Result then begin
  537. i := Length(CollationTable);
  538. SetLength(CollationTable,(i+1));
  539. CollationTable[i] := ACollation;
  540. end;
  541. end;
  542. function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
  543. var
  544. cl : PUCA_DataBook;
  545. begin
  546. cl := LoadCollation(ADirectory,ALanguage);
  547. if (cl = nil) then
  548. exit(False);
  549. try
  550. Result := RegisterCollation(cl);
  551. except
  552. FreeCollation(cl);
  553. raise;
  554. end;
  555. if not Result then
  556. FreeCollation(cl);
  557. end;
  558. function UnregisterCollation(const AName : ansistring): Boolean;
  559. var
  560. i, c : Integer;
  561. begin
  562. i := IndexOfCollation(AName);
  563. Result := (i >= 0);
  564. if Result then begin
  565. c := Length(CollationTable);
  566. if (c = 1) then begin
  567. SetLength(CollationTable,0);
  568. end else begin
  569. CollationTable[i] := CollationTable[c-1];
  570. SetLength(CollationTable,(c-1));
  571. end;
  572. end;
  573. end;
  574. procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
  575. var
  576. i : Integer;
  577. cl : PUCA_DataBook;
  578. begin
  579. if AFreeDynamicCollations then begin
  580. for i := Low(CollationTable) to High(CollationTable) do begin
  581. if CollationTable[i].Dynamic then begin
  582. cl := CollationTable[i];
  583. CollationTable[i] := nil;
  584. FreeCollation(cl);
  585. end;
  586. end;
  587. end;
  588. SetLength(CollationTable,0);
  589. end;
  590. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  591. var
  592. i : Integer;
  593. begin
  594. i := IndexOfCollation(AName);
  595. if (i = -1) then
  596. Result := nil
  597. else
  598. Result := CollationTable[i];
  599. end;
  600. function GetCollationCount() : Integer;
  601. begin
  602. Result := Length(CollationTable);
  603. end;
  604. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  605. begin
  606. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  607. Result := nil
  608. else
  609. Result := CollationTable[AIndex];
  610. end;
  611. procedure PrepareCollation(
  612. ACollation : PUCA_DataBook;
  613. const ABaseName : ansistring;
  614. const AChangedFields : TCollationFields
  615. );
  616. var
  617. s : ansistring;
  618. p, base : PUCA_DataBook;
  619. begin
  620. if (ABaseName <> '') then
  621. s := ABaseName
  622. else
  623. s := ROOT_COLLATION_NAME;
  624. p := ACollation;
  625. base := FindCollation(s);
  626. if (base = nil) then
  627. Error(reCodesetConversion);
  628. p^.Base := base;
  629. if not(TCollationField.BackWard in AChangedFields) then
  630. p^.Backwards := base^.Backwards;
  631. if not(TCollationField.VariableLowLimit in AChangedFields) then
  632. p^.VariableLowLimit := base^.VariableLowLimit;
  633. if not(TCollationField.VariableHighLimit in AChangedFields) then
  634. p^.VariableLowLimit := base^.VariableHighLimit;
  635. end;
  636. type
  637. TSerializedCollationHeader = packed record
  638. Base : TCollationName;
  639. Version : TCollationName;
  640. CollationName : TCollationName;
  641. VariableWeight : Byte;
  642. Backwards : Byte;
  643. BMP_Table1Length : DWord;
  644. BMP_Table2Length : DWord;
  645. OBMP_Table1Length : DWord;
  646. OBMP_Table2Length : DWord;
  647. PropCount : DWord;
  648. VariableLowLimit : Word;
  649. VariableHighLimit : Word;
  650. ChangedFields : Byte;
  651. end;
  652. PSerializedCollationHeader = ^TSerializedCollationHeader;
  653. procedure FreeCollation(AItem : PUCA_DataBook);
  654. var
  655. h : PSerializedCollationHeader;
  656. begin
  657. if (AItem = nil) or not(AItem^.Dynamic) then
  658. exit;
  659. h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
  660. if (AItem^.BMP_Table1 <> nil) then
  661. FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
  662. if (AItem^.BMP_Table2 <> nil) then
  663. FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
  664. if (AItem^.OBMP_Table1 <> nil) then
  665. FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
  666. if (AItem^.OBMP_Table2 <> nil) then
  667. FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
  668. if (AItem^.Props <> nil) then
  669. FreeMem(AItem^.Props,h^.PropCount);
  670. FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  671. end;
  672. function LoadCollation(
  673. const AData : Pointer;
  674. const ADataLength : Integer
  675. ) : PUCA_DataBook;
  676. var
  677. dataPointer : PByte;
  678. readedLength : LongInt;
  679. function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
  680. begin
  681. Result := (readedLength + ALength) <= ADataLength;
  682. if not result then
  683. exit;
  684. Move(dataPointer^,ADest^,ALength);
  685. Inc(dataPointer,ALength);
  686. readedLength := readedLength + ALength;
  687. end;
  688. var
  689. r : PUCA_DataBook;
  690. h : PSerializedCollationHeader;
  691. cfs : TCollationFields;
  692. i : Integer;
  693. baseName : TCollationName;
  694. begin
  695. readedLength := 0;
  696. dataPointer := AData;
  697. r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
  698. try
  699. h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
  700. if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
  701. exit;
  702. r^.Version := h^.Version;
  703. r^.CollationName := h^.CollationName;
  704. r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
  705. r^.Backwards[0] := IsBitON(h^.Backwards,0);
  706. r^.Backwards[1] := IsBitON(h^.Backwards,1);
  707. r^.Backwards[2] := IsBitON(h^.Backwards,2);
  708. r^.Backwards[3] := IsBitON(h^.Backwards,3);
  709. if (h^.BMP_Table1Length > 0) then begin
  710. r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
  711. if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
  712. exit;
  713. end;
  714. if (h^.BMP_Table2Length > 0) then begin
  715. r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
  716. if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
  717. exit;
  718. end;
  719. if (h^.OBMP_Table1Length > 0) then begin
  720. r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
  721. if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
  722. exit;
  723. end;
  724. if (h^.OBMP_Table2Length > 0) then begin
  725. r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
  726. if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
  727. exit;
  728. end;
  729. r^.PropCount := h^.PropCount;
  730. if (h^.PropCount > 0) then begin
  731. r^.Props := GetMem(h^.PropCount);
  732. if not ReadBuffer(r^.Props,h^.PropCount) then
  733. exit;
  734. end;
  735. r^.VariableLowLimit := h^.VariableLowLimit;
  736. r^.VariableHighLimit := h^.VariableHighLimit;
  737. cfs := [];
  738. for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
  739. if IsBitON(h^.ChangedFields,i) then
  740. cfs := cfs + [TCollationField(i)];
  741. end;
  742. if (h^.Base <> '') then
  743. baseName := h^.Base
  744. else if (h^.CollationName <> ROOT_COLLATION_NAME) then
  745. baseName := ROOT_COLLATION_NAME
  746. else
  747. baseName := '';
  748. if (baseName <> '') then
  749. PrepareCollation(r,baseName,cfs);
  750. r^.Dynamic := True;
  751. Result := r;
  752. except
  753. FreeCollation(r);
  754. raise;
  755. end;
  756. end;
  757. {$PUSH}
  758. function LoadCollation(const AFileName : string) : PUCA_DataBook;
  759. const
  760. BLOCK_SIZE = 16*1024;
  761. var
  762. f : File of Byte;
  763. locSize, locReaded, c : LongInt;
  764. locBuffer : PByte;
  765. locBlockSize : LongInt;
  766. begin
  767. Result := nil;
  768. {$I-}
  769. if (AFileName = '') then
  770. exit;
  771. Assign(f,AFileName);
  772. Reset(f);
  773. try
  774. if (IOResult <> 0) then
  775. exit;
  776. locSize := FileSize(f);
  777. if (locSize < SizeOf(TSerializedCollationHeader)) then
  778. exit;
  779. locBuffer := GetMem(locSize);
  780. try
  781. locBlockSize := BLOCK_SIZE;
  782. locReaded := 0;
  783. while (locReaded < locSize) do begin
  784. if (locBlockSize > (locSize-locReaded)) then
  785. locBlockSize := locSize-locReaded;
  786. BlockRead(f,locBuffer[locReaded],locBlockSize,c);
  787. if (IOResult <> 0) or (c <= 0) then
  788. exit;
  789. locReaded := locReaded + c;
  790. end;
  791. Result := LoadCollation(locBuffer,locSize);
  792. finally
  793. FreeMem(locBuffer,locSize);
  794. end;
  795. finally
  796. Close(f);
  797. end;
  798. end;
  799. {$POP}
  800. function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
  801. var
  802. fileName : string;
  803. begin
  804. fileName := ADirectory;
  805. if (fileName <> '') then begin
  806. if (fileName[Length(fileName)] <> DirectorySeparator) then
  807. fileName := fileName + DirectorySeparator;
  808. end;
  809. fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
  810. Result := LoadCollation(fileName);
  811. end;
  812. {$INCLUDE unicodedata.inc}
  813. {$IFDEF ENDIAN_LITTLE}
  814. {$INCLUDE unicodedata_le.inc}
  815. {$ENDIF ENDIAN_LITTLE}
  816. {$IFDEF ENDIAN_BIG}
  817. {$INCLUDE unicodedata_be.inc}
  818. {$ENDIF ENDIAN_BIG}
  819. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  820. begin
  821. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  822. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  823. end;
  824. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  825. begin
  826. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  827. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  828. end;
  829. function UnicodeIsSurrogatePair(
  830. const AHighSurrogate,
  831. ALowSurrogate : UnicodeChar
  832. ) : Boolean;
  833. begin
  834. Result :=
  835. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  836. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  837. ) and
  838. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  839. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  840. )
  841. end;
  842. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  843. begin
  844. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  845. (Word(AValue) <= HIGH_SURROGATE_END);
  846. end;
  847. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  848. begin
  849. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  850. (Word(AValue) <= LOW_SURROGATE_END);
  851. end;
  852. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  853. begin
  854. Result:=
  855. @UC_PROP_ARRAY[
  856. UC_TABLE_3[
  857. UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
  858. [lo(ACodePoint) shr 4]
  859. ][lo(ACodePoint) and $F]
  860. ]; {
  861. @UC_PROP_ARRAY[
  862. UC_TABLE_2[
  863. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  864. WordRec(ACodePoint).Lo
  865. ]
  866. ];}
  867. end;
  868. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  869. begin
  870. Result:=
  871. @UC_PROP_ARRAY[
  872. UCO_TABLE_3[
  873. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  874. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  875. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  876. ]; {
  877. Result:=
  878. @UC_PROP_ARRAY[
  879. UCO_TABLE_2[
  880. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  881. Word(ALowS) - LOW_SURROGATE_BEGIN
  882. ]
  883. ]; }
  884. end;
  885. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  886. var
  887. l, h : UnicodeChar;
  888. begin
  889. if (ACodePoint <= High(Word)) then
  890. exit(GetProps(Word(ACodePoint)));
  891. FromUCS4(ACodePoint,h,l);
  892. Result := GetProps(h,l);
  893. end;
  894. function UnicodeToUpper(
  895. const AString : UnicodeString;
  896. const AIgnoreInvalidSequence : Boolean;
  897. out AResultString : UnicodeString
  898. ) : Integer;
  899. var
  900. i, c : SizeInt;
  901. pp, pr : PUnicodeChar;
  902. pu : PUC_Prop;
  903. locIsSurrogate : Boolean;
  904. r : UnicodeString;
  905. begin
  906. c := Length(AString);
  907. SetLength(r,2*c);
  908. if (c > 0) then begin
  909. pp := @AString[1];
  910. pr := @r[1];
  911. i := 1;
  912. while (i <= c) do begin
  913. pu := GetProps(Word(pp^));
  914. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  915. if locIsSurrogate then begin
  916. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  917. if AIgnoreInvalidSequence then begin
  918. pr^ := pp^;
  919. Inc(pp);
  920. Inc(pr);
  921. Inc(i);
  922. Continue;
  923. end;
  924. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  925. end;
  926. pu := GetProps(pp^,AString[i+1]);
  927. end;
  928. if (pu^.SimpleUpperCase = 0) then begin
  929. pr^ := pp^;
  930. if locIsSurrogate then begin
  931. Inc(pp);
  932. Inc(pr);
  933. Inc(i);
  934. pr^ := pp^;
  935. end;
  936. end else begin
  937. if (pu^.SimpleUpperCase <= $FFFF) then begin
  938. pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
  939. end else begin
  940. FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  941. Inc(pr);
  942. end;
  943. if locIsSurrogate then begin
  944. Inc(pp);
  945. Inc(i);
  946. end;
  947. end;
  948. Inc(pp);
  949. Inc(pr);
  950. Inc(i);
  951. end;
  952. Dec(pp);
  953. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  954. SetLength(r,i);
  955. AResultString := r;
  956. end;
  957. Result := 0;
  958. end;
  959. function UnicodeToLower(
  960. const AString : UnicodeString;
  961. const AIgnoreInvalidSequence : Boolean;
  962. out AResultString : UnicodeString
  963. ) : Integer;
  964. var
  965. i, c : SizeInt;
  966. pp, pr : PUnicodeChar;
  967. pu : PUC_Prop;
  968. locIsSurrogate : Boolean;
  969. r : UnicodeString;
  970. begin
  971. c := Length(AString);
  972. SetLength(r,2*c);
  973. if (c > 0) then begin
  974. pp := @AString[1];
  975. pr := @r[1];
  976. i := 1;
  977. while (i <= c) do begin
  978. pu := GetProps(Word(pp^));
  979. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  980. if locIsSurrogate then begin
  981. if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
  982. if AIgnoreInvalidSequence then begin
  983. pr^ := pp^;
  984. Inc(pp);
  985. Inc(pr);
  986. Inc(i);
  987. Continue;
  988. end;
  989. exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
  990. end;
  991. pu := GetProps(pp^,AString[i+1]);
  992. end;
  993. if (pu^.SimpleLowerCase = 0) then begin
  994. pr^ := pp^;
  995. if locIsSurrogate then begin
  996. Inc(pp);
  997. Inc(pr);
  998. Inc(i);
  999. pr^ := pp^;
  1000. end;
  1001. end else begin
  1002. if (pu^.SimpleLowerCase <= $FFFF) then begin
  1003. pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
  1004. end else begin
  1005. FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
  1006. Inc(pr);
  1007. end;
  1008. if locIsSurrogate then begin
  1009. Inc(pp);
  1010. Inc(i);
  1011. end;
  1012. end;
  1013. Inc(pp);
  1014. Inc(pr);
  1015. Inc(i);
  1016. end;
  1017. Dec(pp);
  1018. i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
  1019. SetLength(r,i);
  1020. AResultString := r;
  1021. end;
  1022. Result := 0;
  1023. end;
  1024. //----------------------------------------------------------------------
  1025. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  1026. const
  1027. SBase = $AC00;
  1028. LBase = $1100;
  1029. VBase = $1161;
  1030. TBase = $11A7;
  1031. LCount = 19;
  1032. VCount = 21;
  1033. TCount = 28;
  1034. NCount = VCount * TCount; // 588
  1035. SCount = LCount * NCount; // 11172
  1036. var
  1037. SIndex, L, V, T : Integer;
  1038. begin
  1039. SIndex := AChar - SBase;
  1040. if (SIndex < 0) or (SIndex >= SCount) then begin
  1041. ABuffer^ := AChar;
  1042. exit(1);
  1043. end;
  1044. L := LBase + SIndex div NCount;
  1045. V := VBase + (SIndex mod NCount) div TCount;
  1046. T := TBase + SIndex mod TCount;
  1047. ABuffer[0] := L;
  1048. ABuffer[1] := V;
  1049. Result := 2;
  1050. if (T <> TBase) then begin
  1051. ABuffer[2] := T;
  1052. Inc(Result);
  1053. end;
  1054. end;
  1055. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  1056. var
  1057. locStack : array[0..23] of Cardinal;
  1058. locStackIdx : Integer;
  1059. ResultBuffer : array[0..23] of Cardinal;
  1060. ResultIdx : Integer;
  1061. procedure AddCompositionToStack(const AIndex : Integer);
  1062. var
  1063. pdecIdx : ^TDecompositionIndexRec;
  1064. k, kc : Integer;
  1065. pu : ^UInt24;
  1066. begin
  1067. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  1068. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  1069. kc := pdecIdx^.Length;
  1070. Inc(pu,kc);
  1071. for k := 1 to kc do begin
  1072. Dec(pu);
  1073. locStack[locStackIdx + k] := pu^;
  1074. end;
  1075. locStackIdx := locStackIdx + kc;
  1076. end;
  1077. procedure AddResult(const AChar : Cardinal);inline;
  1078. begin
  1079. Inc(ResultIdx);
  1080. ResultBuffer[ResultIdx] := AChar;
  1081. end;
  1082. function PopStack() : Cardinal;inline;
  1083. begin
  1084. Result := locStack[locStackIdx];
  1085. Dec(locStackIdx);
  1086. end;
  1087. var
  1088. cu : Cardinal;
  1089. decIdx : SmallInt;
  1090. locIsWord : Boolean;
  1091. i : Integer;
  1092. p : PUnicodeChar;
  1093. begin
  1094. ResultIdx := -1;
  1095. locStackIdx := -1;
  1096. AddCompositionToStack(ADecomposeIndex);
  1097. while (locStackIdx >= 0) do begin
  1098. cu := PopStack();
  1099. locIsWord := (cu <= MAX_WORD);
  1100. if locIsWord then
  1101. decIdx := GetProps(Word(cu))^.DecompositionID
  1102. else
  1103. decIdx := GetProps(cu)^.DecompositionID;
  1104. if (decIdx = -1) then
  1105. AddResult(cu)
  1106. else
  1107. AddCompositionToStack(decIdx);
  1108. end;
  1109. p := ABuffer;
  1110. Result := 0;
  1111. for i := 0 to ResultIdx do begin
  1112. cu := ResultBuffer[i];
  1113. if (cu <= MAX_WORD) then begin
  1114. p[0] := UnicodeChar(Word(cu));
  1115. Inc(p);
  1116. end else begin
  1117. FromUCS4(cu,p[0],p[1]);
  1118. Inc(p,2);
  1119. Inc(Result);
  1120. end;
  1121. end;
  1122. Result := Result + ResultIdx + 1;
  1123. end;
  1124. procedure CanonicalOrder(var AString : UnicodeString);
  1125. begin
  1126. CanonicalOrder(@AString[1],Length(AString));
  1127. end;
  1128. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  1129. var
  1130. i, c : SizeInt;
  1131. p, q : PUnicodeChar;
  1132. locIsSurrogateP, locIsSurrogateQ : Boolean;
  1133. procedure Swap();
  1134. var
  1135. t, t1 : UnicodeChar;
  1136. begin
  1137. if not locIsSurrogateP then begin
  1138. if not locIsSurrogateQ then begin
  1139. t := p^;
  1140. p^ := q^;
  1141. q^ := t;
  1142. exit;
  1143. end;
  1144. t := p^;
  1145. p[0] := q[0];
  1146. p[1] := q[1];
  1147. q[1] := t;
  1148. exit;
  1149. end;
  1150. if not locIsSurrogateQ then begin
  1151. t := q[0];
  1152. p[2] := p[1];
  1153. p[1] := p[0];
  1154. p[0] := t;
  1155. exit;
  1156. end;
  1157. t := p[0];
  1158. t1 := p[1];
  1159. p[0] := q[0];
  1160. p[1] := q[1];
  1161. q[0] := t;
  1162. q[1] := t1;
  1163. end;
  1164. var
  1165. pu : PUC_Prop;
  1166. cccp, cccq : Byte;
  1167. begin
  1168. c := ALength;
  1169. if (c < 2) then
  1170. exit;
  1171. p := AStr;
  1172. i := 1;
  1173. while (i < c) do begin
  1174. pu := GetProps(Word(p^));
  1175. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  1176. if locIsSurrogateP then begin
  1177. if (i = (c - 1)) then
  1178. Break;
  1179. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  1180. Inc(p);
  1181. Inc(i);
  1182. Continue;
  1183. end;
  1184. pu := GetProps(p[0],p[1]);
  1185. end;
  1186. if (pu^.CCC > 0) then begin
  1187. cccp := pu^.CCC;
  1188. if locIsSurrogateP then
  1189. q := p + 2
  1190. else
  1191. q := p + 1;
  1192. pu := GetProps(Word(q^));
  1193. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  1194. if locIsSurrogateQ then begin
  1195. if (i = c) then
  1196. Break;
  1197. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  1198. Inc(p);
  1199. Inc(i);
  1200. Continue;
  1201. end;
  1202. pu := GetProps(q[0],q[1]);
  1203. end;
  1204. cccq := pu^.CCC;
  1205. if (cccq > 0) and (cccp > cccq) then begin
  1206. Swap();
  1207. if (i > 1) then begin
  1208. Dec(p);
  1209. Dec(i);
  1210. pu := GetProps(Word(p^));
  1211. if (pu^.Category = UGC_Surrogate) then begin
  1212. if (i > 1) then begin
  1213. Dec(p);
  1214. Dec(i);
  1215. end;
  1216. end;
  1217. Continue;
  1218. end;
  1219. end;
  1220. end;
  1221. if locIsSurrogateP then begin
  1222. Inc(p);
  1223. Inc(i);
  1224. end;
  1225. Inc(p);
  1226. Inc(i);
  1227. end;
  1228. end;
  1229. //Canonical Decomposition
  1230. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  1231. begin
  1232. Result := NormalizeNFD(@AString[1],Length(AString));
  1233. end;
  1234. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  1235. const MAX_EXPAND = 3;
  1236. var
  1237. i, c, kc, k : SizeInt;
  1238. pp, pr : PUnicodeChar;
  1239. pu : PUC_Prop;
  1240. locIsSurrogate : Boolean;
  1241. cpArray : array[0..7] of Cardinal;
  1242. cp : Cardinal;
  1243. begin
  1244. c := ALength;
  1245. SetLength(Result,(MAX_EXPAND*c));
  1246. if (c > 0) then begin
  1247. pp := AStr;
  1248. pr := @Result[1];
  1249. i := 1;
  1250. while (i <= c) do begin
  1251. pu := GetProps(Word(pp^));
  1252. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  1253. if locIsSurrogate then begin
  1254. if (i = c) then
  1255. Break;
  1256. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  1257. pr^ := pp^;
  1258. Inc(pp);
  1259. Inc(pr);
  1260. Inc(i);
  1261. Continue;
  1262. end;
  1263. pu := GetProps(pp[0],pp[1]);
  1264. end;
  1265. if pu^.HangulSyllable then begin
  1266. if locIsSurrogate then begin
  1267. cp := ToUCS4(pp[0],pp[1]);
  1268. Inc(pp);
  1269. Inc(i);
  1270. end else begin
  1271. cp := Word(pp^);
  1272. end;
  1273. kc := DecomposeHangul(cp,@cpArray[0]);
  1274. for k := 0 to kc - 1 do begin
  1275. if (cpArray[k] <= MAX_WORD) then begin
  1276. pr^ := UnicodeChar(Word(cpArray[k]));
  1277. pr := pr + 1;
  1278. end else begin
  1279. FromUCS4(cpArray[k],pr[0],pr[1]);
  1280. pr := pr + 2;
  1281. end;
  1282. end;
  1283. if (kc > 0) then
  1284. Dec(pr);
  1285. end else begin
  1286. if (pu^.DecompositionID = -1) then begin
  1287. pr^ := pp^;
  1288. if locIsSurrogate then begin
  1289. Inc(pp);
  1290. Inc(pr);
  1291. Inc(i);
  1292. pr^ := pp^;
  1293. end;
  1294. end else begin
  1295. k := Decompose(pu^.DecompositionID,pr);
  1296. pr := pr + (k - 1);
  1297. if locIsSurrogate then begin
  1298. Inc(pp);
  1299. Inc(i);
  1300. end;
  1301. end;
  1302. end;
  1303. Inc(pp);
  1304. Inc(pr);
  1305. Inc(i);
  1306. end;
  1307. Dec(pp);
  1308. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  1309. SetLength(Result,i);
  1310. CanonicalOrder(@Result[1],Length(Result));
  1311. end;
  1312. end;
  1313. { TUCA_PropItemContextTreeNodeRec }
  1314. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  1315. begin
  1316. if (Self.Left = 0) then
  1317. Result := nil
  1318. else
  1319. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  1320. end;
  1321. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  1322. begin
  1323. if (Self.Right = 0) then
  1324. Result := nil
  1325. else
  1326. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  1327. end;
  1328. { TUCA_PropItemContextRec }
  1329. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  1330. begin
  1331. Result := PUInt24(
  1332. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  1333. SizeOf(Self.WeightCount)
  1334. );
  1335. end;
  1336. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  1337. begin
  1338. Result := PUCA_PropWeights(
  1339. PtrUInt(@Self) +
  1340. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  1341. (Self.CodePointCount*SizeOf(UInt24))
  1342. );
  1343. end;
  1344. { TUCA_PropItemContextTreeRec }
  1345. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  1346. begin
  1347. if (Size = 0) then
  1348. Result := nil
  1349. else
  1350. Result := PUCA_PropItemContextTreeNodeRec(
  1351. PtrUInt(
  1352. PtrUInt(@Self) + SizeOf(UInt24){Size}
  1353. )
  1354. );
  1355. end;
  1356. function CompareCodePoints(
  1357. A : PUInt24; LA : Integer;
  1358. B : PUInt24; LB : Integer
  1359. ) : Integer;
  1360. var
  1361. i, hb : Integer;
  1362. begin
  1363. if (A = B) then
  1364. exit(0);
  1365. Result := 1;
  1366. hb := LB - 1;
  1367. for i := 0 to LA - 1 do begin
  1368. if (i > hb) then
  1369. exit;
  1370. if (A[i] < B[i]) then
  1371. exit(-1);
  1372. if (A[i] > B[i]) then
  1373. exit(1);
  1374. end;
  1375. if (LA = LB) then
  1376. exit(0);
  1377. exit(-1);
  1378. end;
  1379. function TUCA_PropItemContextTreeRec.Find(
  1380. const AChars : PUInt24;
  1381. const ACharCount : Integer;
  1382. out ANode : PUCA_PropItemContextTreeNodeRec
  1383. ) : Boolean;
  1384. var
  1385. t : PUCA_PropItemContextTreeNodeRec;
  1386. begin
  1387. t := Data;
  1388. while (t <> nil) do begin
  1389. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1390. 0 : Break;
  1391. -1 : t := t^.GetLeftNode();
  1392. else
  1393. t := t^.GetRightNode();
  1394. end;
  1395. end;
  1396. Result := (t <> nil);
  1397. if Result then
  1398. ANode := t;
  1399. end;
  1400. { TUC_Prop }
  1401. function TUC_Prop.GetCategory: Byte;
  1402. begin
  1403. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1404. end;
  1405. function TUC_Prop.GetNumericValue: Double;
  1406. begin
  1407. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1408. end;
  1409. procedure TUC_Prop.SetCategory(AValue: Byte);
  1410. begin
  1411. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1412. end;
  1413. function TUC_Prop.GetWhiteSpace: Boolean;
  1414. begin
  1415. Result := IsBitON(CategoryData,0);
  1416. end;
  1417. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1418. begin
  1419. SetBit(CategoryData,0,AValue);
  1420. end;
  1421. function TUC_Prop.GetHangulSyllable: Boolean;
  1422. begin
  1423. Result := IsBitON(CategoryData,1);
  1424. end;
  1425. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1426. begin
  1427. SetBit(CategoryData,1,AValue);
  1428. end;
  1429. { TUCA_DataBook }
  1430. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1431. begin
  1432. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1433. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1434. end;
  1435. { TUCA_PropItemRec }
  1436. function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
  1437. begin
  1438. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
  1439. end;
  1440. function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
  1441. begin
  1442. Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
  1443. end;
  1444. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1445. begin
  1446. if HasCodePoint() then begin
  1447. if Contextual then
  1448. Result := Unaligned(
  1449. PUInt24(
  1450. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1451. Cardinal(GetContext()^.Size)
  1452. )^
  1453. )
  1454. else
  1455. Result := Unaligned(PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^)
  1456. end else begin
  1457. {$ifdef uni_debug}
  1458. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1459. {$else uni_debug}
  1460. Result := ZERO_UINT24;
  1461. {$endif uni_debug}
  1462. end
  1463. end;
  1464. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1465. begin
  1466. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1467. end;
  1468. function TUCA_PropItemRec.IsValid() : Boolean;
  1469. begin
  1470. Result := IsBitON(Flags,FLAG_VALID);
  1471. end;
  1472. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1473. begin
  1474. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1475. end;}
  1476. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1477. var
  1478. c : Integer;
  1479. p : PByte;
  1480. pd : PUCA_PropWeights;
  1481. begin
  1482. c := WeightLength;
  1483. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1484. pd := ADest;
  1485. pd^.Weights[0] := Unaligned(PWord(p)^);
  1486. p := p + 2;
  1487. if not IsWeightCompress_1() then begin
  1488. pd^.Weights[1] := Unaligned(PWord(p)^);
  1489. p := p + 2;
  1490. end else begin
  1491. pd^.Weights[1] := p^;
  1492. p := p + 1;
  1493. end;
  1494. if not IsWeightCompress_2() then begin
  1495. pd^.Weights[2] := Unaligned(PWord(p)^);
  1496. p := p + 2;
  1497. end else begin
  1498. pd^.Weights[2] := p^;
  1499. p := p + 1;
  1500. end;
  1501. if (c > 1) then
  1502. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1503. end;
  1504. function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
  1505. begin
  1506. Result := SizeOf(TUCA_PropItemRec);
  1507. if (WeightLength > 0) then begin
  1508. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1509. if IsWeightCompress_1() then
  1510. Result := Result - 1;
  1511. if IsWeightCompress_2() then
  1512. Result := Result - 1;
  1513. end;
  1514. if HasCodePoint() then
  1515. Result := Result + SizeOf(UInt24);
  1516. if Contextual then
  1517. Result := Result + Cardinal(GetContext()^.Size);
  1518. end;
  1519. function TUCA_PropItemRec.GetContextual: Boolean;
  1520. begin
  1521. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1522. end;
  1523. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1524. var
  1525. p : PtrUInt;
  1526. begin
  1527. if not Contextual then
  1528. exit(nil);
  1529. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1530. if IsBitON(Flags,FLAG_CODEPOINT) then
  1531. p := p + SizeOf(UInt24);
  1532. Result := PUCA_PropItemContextTreeRec(p);
  1533. end;
  1534. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1535. begin
  1536. Result := IsBitON(Flags,FLAG_DELETION);
  1537. end;
  1538. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1539. var
  1540. i : Cardinal;
  1541. begin
  1542. if (ABook^.BMP_Table2 = nil) then
  1543. exit(nil);
  1544. i := ABook^.BMP_Table2[
  1545. (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
  1546. Lo(Word(AChar))
  1547. ];
  1548. if (i > 0) then
  1549. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1550. else
  1551. Result := nil;
  1552. end;
  1553. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1554. var
  1555. i : Cardinal;
  1556. begin
  1557. if (ABook^.OBMP_Table2 = nil) then
  1558. exit(nil);
  1559. i := ABook^.OBMP_Table2[
  1560. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1561. Word(ALowS) - LOW_SURROGATE_BEGIN
  1562. ];
  1563. if (i > 0) then
  1564. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1565. else
  1566. Result := nil;
  1567. end;
  1568. {$include weight_derivation.inc}
  1569. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1570. var
  1571. bb : TUCASortKey;
  1572. begin
  1573. SetLength(bb,Length(B));
  1574. if (Length(bb) > 0) then
  1575. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1576. Result := CompareSortKey(A,bb);
  1577. end;
  1578. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1579. var
  1580. i, hb : Integer;
  1581. begin
  1582. if (Pointer(A) = Pointer(B)) then
  1583. exit(0);
  1584. Result := 1;
  1585. hb := Length(B) - 1;
  1586. for i := 0 to Length(A) - 1 do begin
  1587. if (i > hb) then
  1588. exit;
  1589. if (A[i] < B[i]) then
  1590. exit(-1);
  1591. if (A[i] > B[i]) then
  1592. exit(1);
  1593. end;
  1594. if (Length(A) = Length(B)) then
  1595. exit(0);
  1596. exit(-1);
  1597. end;
  1598. type
  1599. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1600. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1601. var
  1602. r : TUCASortKey;
  1603. i, c, k, ral, levelCount : Integer;
  1604. pce : PUCA_PropWeights;
  1605. begin
  1606. c := Length(ACEList);
  1607. if (c = 0) then
  1608. exit(nil);
  1609. levelCount := Length(ACEList[0].Weights);
  1610. SetLength(r,(levelCount*c + levelCount));
  1611. ral := 0;
  1612. for i := 0 to levelCount - 1 do begin
  1613. if not ACollation^.Backwards[i] then begin
  1614. pce := @ACEList[0];
  1615. for k := 0 to c - 1 do begin
  1616. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1617. r[ral] := pce^.Weights[i];
  1618. ral := ral + 1;
  1619. end;
  1620. pce := pce + 1;
  1621. end;
  1622. end else begin
  1623. pce := @ACEList[c-1];
  1624. for k := 0 to c - 1 do begin
  1625. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1626. r[ral] := pce^.Weights[i];
  1627. ral := ral + 1;
  1628. end;
  1629. pce := pce - 1;
  1630. end;
  1631. end;
  1632. r[ral] := 0;
  1633. ral := ral + 1;
  1634. end;
  1635. ral := ral - 1;
  1636. SetLength(r,ral);
  1637. Result := r;
  1638. end;
  1639. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1640. var
  1641. r : TUCASortKey;
  1642. i, c, k, ral, levelCount : Integer;
  1643. pce : PUCA_PropWeights;
  1644. begin
  1645. c := Length(ACEList);
  1646. if (c = 0) then
  1647. exit(nil);
  1648. levelCount := Length(ACEList[0].Weights);
  1649. SetLength(r,(levelCount*c + levelCount));
  1650. ral := 0;
  1651. for i := 0 to levelCount - 1 do begin
  1652. if not ACollation^.Backwards[i] then begin
  1653. pce := @ACEList[0];
  1654. for k := 0 to c - 1 do begin
  1655. if (pce^.Weights[i] <> 0) then begin
  1656. r[ral] := pce^.Weights[i];
  1657. ral := ral + 1;
  1658. end;
  1659. pce := pce + 1;
  1660. end;
  1661. end else begin
  1662. pce := @ACEList[c-1];
  1663. for k := 0 to c - 1 do begin
  1664. if (pce^.Weights[i] <> 0) then begin
  1665. r[ral] := pce^.Weights[i];
  1666. ral := ral + 1;
  1667. end;
  1668. pce := pce - 1;
  1669. end;
  1670. end;
  1671. r[ral] := 0;
  1672. ral := ral + 1;
  1673. end;
  1674. ral := ral - 1;
  1675. SetLength(r,ral);
  1676. Result := r;
  1677. end;
  1678. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1679. var
  1680. r : TUCASortKey;
  1681. i, c, k, ral, levelCount : Integer;
  1682. pce : PUCA_PropWeights;
  1683. variableState : Boolean;
  1684. begin
  1685. c := Length(ACEList);
  1686. if (c = 0) then
  1687. exit(nil);
  1688. levelCount := Length(ACEList[0].Weights);
  1689. SetLength(r,(levelCount*c + levelCount));
  1690. ral := 0;
  1691. for i := 0 to levelCount - 1 do begin
  1692. if not ACollation^.Backwards[i] then begin
  1693. variableState := False;
  1694. pce := @ACEList[0];
  1695. for k := 0 to c - 1 do begin
  1696. if not ACollation^.IsVariable(pce) then begin
  1697. if (pce^.Weights[0] <> 0) then
  1698. variableState := False;
  1699. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1700. r[ral] := pce^.Weights[i];
  1701. ral := ral + 1;
  1702. end;
  1703. end else begin
  1704. variableState := True;
  1705. end;
  1706. pce := pce + 1;
  1707. end;
  1708. end else begin
  1709. pce := @ACEList[c-1];
  1710. for k := 0 to c - 1 do begin
  1711. if not ACollation^.IsVariable(pce) then begin
  1712. if (pce^.Weights[0] <> 0) then
  1713. variableState := False;
  1714. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1715. r[ral] := pce^.Weights[i];
  1716. ral := ral + 1;
  1717. end;
  1718. end else begin
  1719. variableState := True;
  1720. end;
  1721. pce := pce - 1;
  1722. end;
  1723. end;
  1724. r[ral] := 0;
  1725. ral := ral + 1;
  1726. end;
  1727. ral := ral - 1;
  1728. SetLength(r,ral);
  1729. Result := r;
  1730. end;
  1731. function FormKeyShiftedTrimmed(
  1732. const ACEList : TUCA_PropWeightsArray;
  1733. const ACollation : PUCA_DataBook
  1734. ) : TUCASortKey;
  1735. var
  1736. i : Integer;
  1737. p : ^TUCASortKeyItem;
  1738. begin
  1739. Result := FormKeyShifted(ACEList,ACollation);
  1740. i := Length(Result) - 1;
  1741. if (i >= 0) then begin
  1742. p := @Result[i];
  1743. while (i >= 0) do begin
  1744. if (p^ <> $FFFF) then
  1745. Break;
  1746. Dec(i);
  1747. Dec(p);
  1748. end;
  1749. if ((i+1) < Length(Result)) then
  1750. SetLength(Result,(i+1));
  1751. end;
  1752. end;
  1753. function FindChild(
  1754. const ACodePoint : Cardinal;
  1755. const AParent : PUCA_PropItemRec
  1756. ) : PUCA_PropItemRec;inline;
  1757. var
  1758. k : Integer;
  1759. begin
  1760. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1761. for k := 0 to AParent^.ChildCount - 1 do begin
  1762. if (ACodePoint = Result^.CodePoint) then
  1763. exit;
  1764. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1765. end;
  1766. Result := nil;
  1767. end;
  1768. function ComputeSortKey(
  1769. const AString : UnicodeString;
  1770. const ACollation : PUCA_DataBook
  1771. ) : TUCASortKey;
  1772. begin
  1773. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1774. end;
  1775. function ComputeRawSortKey(
  1776. const AStr : PUnicodeChar;
  1777. const ALength : SizeInt;
  1778. const ACollation : PUCA_DataBook
  1779. ) : TUCA_PropWeightsArray;
  1780. var
  1781. r : TUCA_PropWeightsArray;
  1782. ral {used length of "r"}: Integer;
  1783. rl {capacity of "r"} : Integer;
  1784. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1785. begin
  1786. if (rl < AMinGrow) then
  1787. rl := rl + AMinGrow
  1788. else
  1789. rl := 2 * rl;
  1790. SetLength(r,rl);
  1791. end;
  1792. var
  1793. i : Integer;
  1794. s : UnicodeString;
  1795. ps : PUnicodeChar;
  1796. cp : Cardinal;
  1797. cl : PUCA_DataBook;
  1798. pp : PUCA_PropItemRec;
  1799. ppLevel : Byte;
  1800. removedCharIndex : array of DWord;
  1801. removedCharIndexLength : DWord;
  1802. locHistory : array[0..24] of record
  1803. i : Integer;
  1804. cl : PUCA_DataBook;
  1805. pp : PUCA_PropItemRec;
  1806. ppLevel : Byte;
  1807. cp : Cardinal;
  1808. removedCharIndexLength : DWord;
  1809. end;
  1810. locHistoryTop : Integer;
  1811. suppressState : record
  1812. cl : PUCA_DataBook;
  1813. CharCount : Integer;
  1814. end;
  1815. LastKeyOwner : record
  1816. Length : Integer;
  1817. Chars : array[0..24] of UInt24;
  1818. end;
  1819. procedure SaveKeyOwner();
  1820. var
  1821. k : Integer;
  1822. kppLevel : Byte;
  1823. begin
  1824. k := 0;
  1825. kppLevel := High(Byte);
  1826. while (k <= locHistoryTop) do begin
  1827. if (kppLevel <> locHistory[k].ppLevel) then begin
  1828. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1829. kppLevel := locHistory[k].ppLevel;
  1830. end;
  1831. k := k + 1;
  1832. end;
  1833. if (k = 0) or (kppLevel <> ppLevel) then begin
  1834. LastKeyOwner.Chars[k] := cp;
  1835. k := k + 1;
  1836. end;
  1837. LastKeyOwner.Length := k;
  1838. end;
  1839. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1840. begin
  1841. SaveKeyOwner();
  1842. if ((ral + AItem^.WeightLength) > rl) then
  1843. GrowKey(AItem^.WeightLength);
  1844. AItem^.GetWeightArray(@r[ral]);
  1845. ral := ral + AItem^.WeightLength;
  1846. end;
  1847. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1848. begin
  1849. if ((ral + AItem^.WeightCount) > rl) then
  1850. GrowKey(AItem^.WeightCount);
  1851. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1852. ral := ral + AItem^.WeightCount;
  1853. end;
  1854. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1855. begin
  1856. SaveKeyOwner();
  1857. if ((ral + 2) > rl) then
  1858. GrowKey();
  1859. DeriveWeight(ACodePoint,@r[ral]);
  1860. ral := ral + 2;
  1861. end;
  1862. procedure RecordDeletion();inline;
  1863. begin
  1864. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1865. if (suppressState.cl = nil) or
  1866. (suppressState.CharCount > ppLevel)
  1867. then begin
  1868. suppressState.cl := cl;
  1869. suppressState.CharCount := ppLevel;
  1870. end;
  1871. end;
  1872. end;
  1873. procedure RecordStep();inline;
  1874. begin
  1875. Inc(locHistoryTop);
  1876. locHistory[locHistoryTop].i := i;
  1877. locHistory[locHistoryTop].cl := cl;
  1878. locHistory[locHistoryTop].pp := pp;
  1879. locHistory[locHistoryTop].ppLevel := ppLevel;
  1880. locHistory[locHistoryTop].cp := cp;
  1881. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1882. RecordDeletion();
  1883. end;
  1884. procedure ClearHistory();inline;
  1885. begin
  1886. locHistoryTop := -1;
  1887. end;
  1888. function HasHistory() : Boolean;inline;
  1889. begin
  1890. Result := (locHistoryTop >= 0);
  1891. end;
  1892. function GetHistoryLength() : Integer;inline;
  1893. begin
  1894. Result := (locHistoryTop + 1);
  1895. end;
  1896. procedure GoBack();inline;
  1897. begin
  1898. Assert(locHistoryTop >= 0);
  1899. i := locHistory[locHistoryTop].i;
  1900. cp := locHistory[locHistoryTop].cp;
  1901. cl := locHistory[locHistoryTop].cl;
  1902. pp := locHistory[locHistoryTop].pp;
  1903. ppLevel := locHistory[locHistoryTop].ppLevel;
  1904. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1905. ps := @s[i];
  1906. Dec(locHistoryTop);
  1907. end;
  1908. var
  1909. c : Integer;
  1910. lastUnblockedNonstarterCCC : Byte;
  1911. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1912. var
  1913. k : DWord;
  1914. pk : PUnicodeChar;
  1915. puk : PUC_Prop;
  1916. begin
  1917. k := AStartFrom;
  1918. if (k > c) then
  1919. exit(False);
  1920. if (removedCharIndexLength>0) and
  1921. (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0)
  1922. then begin
  1923. exit(False);
  1924. end;
  1925. {if (k = (i+1)) or
  1926. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1927. then
  1928. lastUnblockedNonstarterCCC := 0;}
  1929. pk := @s[k];
  1930. if UnicodeIsHighSurrogate(pk^) then begin
  1931. if (k = c) then
  1932. exit(False);
  1933. if UnicodeIsLowSurrogate(pk[1]) then
  1934. puk := GetProps(pk[0],pk[1])
  1935. else
  1936. puk := GetProps(Word(pk^));
  1937. end else begin
  1938. puk := GetProps(Word(pk^));
  1939. end;
  1940. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1941. exit(False);
  1942. lastUnblockedNonstarterCCC := puk^.CCC;
  1943. Result := True;
  1944. end;
  1945. procedure RemoveChar(APos : Integer);inline;
  1946. begin
  1947. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1948. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1949. removedCharIndex[removedCharIndexLength] := APos;
  1950. Inc(removedCharIndexLength);
  1951. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  1952. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1953. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1954. removedCharIndex[removedCharIndexLength] := APos+1;
  1955. Inc(removedCharIndexLength);
  1956. end;
  1957. end;
  1958. procedure Inc_I();inline;
  1959. begin
  1960. if (removedCharIndexLength = 0) then begin
  1961. Inc(i);
  1962. Inc(ps);
  1963. exit;
  1964. end;
  1965. while True do begin
  1966. Inc(i);
  1967. Inc(ps);
  1968. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  1969. Break;
  1970. end;
  1971. end;
  1972. var
  1973. surrogateState : Boolean;
  1974. function MoveToNextChar() : Boolean;inline;
  1975. begin
  1976. Result := True;
  1977. if UnicodeIsHighSurrogate(ps[0]) then begin
  1978. if (i = c) then
  1979. exit(False);
  1980. if UnicodeIsLowSurrogate(ps[1]) then begin
  1981. surrogateState := True;
  1982. cp := ToUCS4(ps[0],ps[1]);
  1983. end else begin
  1984. surrogateState := False;
  1985. cp := Word(ps[0]);
  1986. end;
  1987. end else begin
  1988. surrogateState := False;
  1989. cp := Word(ps[0]);
  1990. end;
  1991. end;
  1992. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  1993. begin
  1994. cl := nil;
  1995. pp := nil;
  1996. ppLevel := 0;
  1997. if AClearSuppressInfo then begin
  1998. suppressState.cl := nil;
  1999. suppressState.CharCount := 0;
  2000. end;
  2001. end;
  2002. function FindPropUCA() : Boolean;
  2003. var
  2004. candidateCL : PUCA_DataBook;
  2005. begin
  2006. pp := nil;
  2007. if (cl = nil) then
  2008. candidateCL := ACollation
  2009. else
  2010. candidateCL := cl;
  2011. if surrogateState then begin
  2012. while (candidateCL <> nil) do begin
  2013. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  2014. if (pp <> nil) then
  2015. break;
  2016. candidateCL := candidateCL^.Base;
  2017. end;
  2018. end else begin
  2019. while (candidateCL <> nil) do begin
  2020. pp := GetPropUCA(ps[0],candidateCL);
  2021. if (pp <> nil) then
  2022. break;
  2023. candidateCL := candidateCL^.Base;
  2024. end;
  2025. end;
  2026. cl := candidateCL;
  2027. Result := (pp <> nil);
  2028. end;
  2029. procedure AddWeightsAndClear();inline;
  2030. var
  2031. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2032. begin
  2033. if (pp^.WeightLength > 0) then begin
  2034. AddWeights(pp);
  2035. end else
  2036. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2037. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2038. (ctxNode^.Data.WeightCount > 0)
  2039. then begin
  2040. AddContextWeights(@ctxNode^.Data);
  2041. end;
  2042. //AddWeights(pp);
  2043. ClearHistory();
  2044. ClearPP();
  2045. end;
  2046. procedure StartMatch();
  2047. procedure HandleLastChar();
  2048. var
  2049. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2050. begin
  2051. while True do begin
  2052. if pp^.IsValid() then begin
  2053. if (pp^.WeightLength > 0) then
  2054. AddWeights(pp)
  2055. else
  2056. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2057. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2058. (ctxNode^.Data.WeightCount > 0)
  2059. then
  2060. AddContextWeights(@ctxNode^.Data)
  2061. else
  2062. AddComputedWeights(cp){handle deletion of code point};
  2063. break;
  2064. end;
  2065. if (cl^.Base = nil) then begin
  2066. AddComputedWeights(cp);
  2067. break;
  2068. end;
  2069. cl := cl^.Base;
  2070. if not FindPropUCA() then begin
  2071. AddComputedWeights(cp);
  2072. break;
  2073. end;
  2074. end;
  2075. end;
  2076. var
  2077. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2078. begin
  2079. ppLevel := 0;
  2080. if not FindPropUCA() then begin
  2081. AddComputedWeights(cp);
  2082. ClearHistory();
  2083. ClearPP();
  2084. end else begin
  2085. if (i = c) then begin
  2086. HandleLastChar();
  2087. end else begin
  2088. if pp^.IsValid()then begin
  2089. if (pp^.ChildCount = 0) then begin
  2090. if (pp^.WeightLength > 0) then
  2091. AddWeights(pp)
  2092. else
  2093. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2094. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  2095. (tmpCtxNode^.Data.WeightCount > 0)
  2096. then
  2097. AddContextWeights(@tmpCtxNode^.Data)
  2098. else
  2099. AddComputedWeights(cp){handle deletion of code point};
  2100. ClearPP();
  2101. ClearHistory();
  2102. end else begin
  2103. RecordStep();
  2104. end
  2105. end else begin
  2106. if (pp^.ChildCount = 0) then begin
  2107. AddComputedWeights(cp);
  2108. ClearPP();
  2109. ClearHistory();
  2110. end else begin
  2111. RecordStep();
  2112. end;
  2113. end ;
  2114. end;
  2115. end;
  2116. end;
  2117. function TryPermutation() : Boolean;
  2118. var
  2119. kk : Integer;
  2120. b : Boolean;
  2121. puk : PUC_Prop;
  2122. ppk : PUCA_PropItemRec;
  2123. begin
  2124. Result := False;
  2125. puk := GetProps(cp);
  2126. if (puk^.CCC = 0) then
  2127. exit;
  2128. lastUnblockedNonstarterCCC := puk^.CCC;
  2129. if surrogateState then
  2130. kk := i + 2
  2131. else
  2132. kk := i + 1;
  2133. while IsUnblockedNonstarter(kk) do begin
  2134. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  2135. if b then
  2136. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  2137. else
  2138. ppk := FindChild(Word(s[kk]),pp);
  2139. if (ppk <> nil) then begin
  2140. pp := ppk;
  2141. RemoveChar(kk);
  2142. Inc(ppLevel);
  2143. RecordStep();
  2144. Result := True;
  2145. if (pp^.ChildCount = 0 ) then
  2146. Break;
  2147. end;
  2148. if b then
  2149. Inc(kk);
  2150. Inc(kk);
  2151. end;
  2152. end;
  2153. procedure AdvanceCharPos();inline;
  2154. begin
  2155. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  2156. Inc(i);
  2157. Inc(ps);
  2158. end;
  2159. Inc_I();
  2160. end;
  2161. var
  2162. ok : Boolean;
  2163. pp1 : PUCA_PropItemRec;
  2164. cltemp : PUCA_DataBook;
  2165. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2166. begin
  2167. if (ALength = 0) then
  2168. exit(nil);
  2169. c := ALength;
  2170. s := NormalizeNFD(AStr,c);
  2171. c := Length(s);
  2172. rl := 3*c;
  2173. SetLength(r,rl);
  2174. ral := 0;
  2175. ps := @s[1];
  2176. ClearPP();
  2177. locHistoryTop := -1;
  2178. removedCharIndexLength := 0;
  2179. FillByte(suppressState,SizeOf(suppressState),0);
  2180. LastKeyOwner.Length := 0;
  2181. i := 1;
  2182. while (i <= c) and MoveToNextChar() do begin
  2183. if (pp = nil) then begin // Start Matching
  2184. StartMatch();
  2185. end else begin
  2186. pp1 := FindChild(cp,pp);
  2187. if (pp1 <> nil) then begin
  2188. Inc(ppLevel);
  2189. pp := pp1;
  2190. if (pp^.ChildCount = 0) or (i = c) then begin
  2191. ok := False;
  2192. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2193. if (pp^.WeightLength > 0) then begin
  2194. AddWeightsAndClear();
  2195. ok := True;
  2196. end else
  2197. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2198. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2199. (ctxNode^.Data.WeightCount > 0)
  2200. then begin
  2201. AddContextWeights(@ctxNode^.Data);
  2202. ClearHistory();
  2203. ClearPP();
  2204. ok := True;
  2205. end
  2206. end;
  2207. if not ok then begin
  2208. RecordDeletion();
  2209. ok := False;
  2210. while HasHistory() do begin
  2211. GoBack();
  2212. if pp^.IsValid() and
  2213. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2214. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2215. )
  2216. then begin
  2217. AddWeightsAndClear();
  2218. ok := True;
  2219. Break;
  2220. end;
  2221. end;
  2222. if not ok then begin
  2223. cltemp := cl^.Base;
  2224. if (cltemp <> nil) then begin
  2225. ClearPP(False);
  2226. cl := cltemp;
  2227. Continue;
  2228. end;
  2229. end;
  2230. if not ok then begin
  2231. AddComputedWeights(cp);
  2232. ClearHistory();
  2233. ClearPP();
  2234. end;
  2235. end;
  2236. end else begin
  2237. RecordStep();
  2238. end;
  2239. end else begin
  2240. // permutations !
  2241. ok := False;
  2242. if TryPermutation() and pp^.IsValid() then begin
  2243. if (suppressState.CharCount = 0) then begin
  2244. AddWeightsAndClear();
  2245. Continue;
  2246. end;
  2247. while True do begin
  2248. if pp^.IsValid() and
  2249. (pp^.WeightLength > 0) and
  2250. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2251. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2252. )
  2253. then begin
  2254. AddWeightsAndClear();
  2255. ok := True;
  2256. break;
  2257. end;
  2258. if not HasHistory() then
  2259. break;
  2260. GoBack();
  2261. if (pp = nil) then
  2262. break;
  2263. end;
  2264. end;
  2265. if not ok then begin
  2266. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2267. if (pp^.WeightLength > 0) then begin
  2268. AddWeightsAndClear();
  2269. ok := True;
  2270. end else
  2271. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2272. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2273. (ctxNode^.Data.WeightCount > 0)
  2274. then begin
  2275. AddContextWeights(@ctxNode^.Data);
  2276. ClearHistory();
  2277. ClearPP();
  2278. ok := True;
  2279. end
  2280. end;
  2281. if ok then
  2282. Continue;
  2283. end;
  2284. if not ok then begin
  2285. if (cl^.Base <> nil) then begin
  2286. cltemp := cl^.Base;
  2287. while HasHistory() do
  2288. GoBack();
  2289. pp := nil;
  2290. ppLevel := 0;
  2291. cl := cltemp;
  2292. Continue;
  2293. end;
  2294. //walk back
  2295. ok := False;
  2296. while HasHistory() do begin
  2297. GoBack();
  2298. if pp^.IsValid() and
  2299. (pp^.WeightLength > 0) and
  2300. ( (suppressState.CharCount = 0) or
  2301. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2302. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2303. )
  2304. )
  2305. then begin
  2306. AddWeightsAndClear();
  2307. ok := True;
  2308. Break;
  2309. end;
  2310. end;
  2311. if ok then begin
  2312. AdvanceCharPos();
  2313. Continue;
  2314. end;
  2315. if (pp <> nil) then begin
  2316. AddComputedWeights(cp);
  2317. ClearHistory();
  2318. ClearPP();
  2319. end;
  2320. end;
  2321. end;
  2322. end;
  2323. if surrogateState then begin
  2324. Inc(ps);
  2325. Inc(i);
  2326. end;
  2327. //
  2328. Inc_I();
  2329. end;
  2330. SetLength(r,ral);
  2331. Result := r;
  2332. end;
  2333. type
  2334. TComputeKeyContext = record
  2335. Collation : PUCA_DataBook;
  2336. r : TUCA_PropWeightsArray;
  2337. ral {used length of "r"}: Integer;
  2338. rl {capacity of "r"} : Integer;
  2339. i : Integer;
  2340. s : UnicodeString;
  2341. ps : PUnicodeChar;
  2342. cp : Cardinal;
  2343. cl : PUCA_DataBook;
  2344. pp : PUCA_PropItemRec;
  2345. ppLevel : Byte;
  2346. removedCharIndex : array of DWord;
  2347. removedCharIndexLength : DWord;
  2348. locHistoryTop : Integer;
  2349. locHistory : array[0..24] of record
  2350. i : Integer;
  2351. cl : PUCA_DataBook;
  2352. pp : PUCA_PropItemRec;
  2353. ppLevel : Byte;
  2354. cp : Cardinal;
  2355. removedCharIndexLength : DWord;
  2356. end;
  2357. suppressState : record
  2358. cl : PUCA_DataBook;
  2359. CharCount : Integer;
  2360. end;
  2361. LastKeyOwner : record
  2362. Length : Integer;
  2363. Chars : array[0..24] of UInt24;
  2364. end;
  2365. c : Integer;
  2366. lastUnblockedNonstarterCCC : Byte;
  2367. surrogateState : Boolean;
  2368. Finished : Boolean;
  2369. end;
  2370. PComputeKeyContext = ^TComputeKeyContext;
  2371. procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
  2372. begin
  2373. AContext^.cl := nil;
  2374. AContext^.pp := nil;
  2375. AContext^.ppLevel := 0;
  2376. if AClearSuppressInfo then begin
  2377. AContext^.suppressState.cl := nil;
  2378. AContext^.suppressState.CharCount := 0;
  2379. end;
  2380. end;
  2381. procedure InitContext(
  2382. AContext : PComputeKeyContext;
  2383. const AStr : PUnicodeChar;
  2384. const ALength : SizeInt;
  2385. const ACollation : PUCA_DataBook
  2386. );
  2387. begin
  2388. AContext^.Collation := ACollation;
  2389. AContext^.c := ALength;
  2390. AContext^.s := NormalizeNFD(AStr,AContext^.c);
  2391. AContext^.c := Length(AContext^.s);
  2392. AContext^.rl := 3*AContext^.c;
  2393. SetLength(AContext^.r,AContext^.rl);
  2394. AContext^.ral := 0;
  2395. AContext^.ps := @AContext^.s[1];
  2396. ClearPP(AContext);
  2397. AContext^.locHistoryTop := -1;
  2398. AContext^.removedCharIndexLength := 0;
  2399. FillByte(AContext^.suppressState,SizeOf(AContext^.suppressState),0);
  2400. AContext^.LastKeyOwner.Length := 0;
  2401. AContext^.i := 1;
  2402. AContext^.Finished := False;
  2403. end;
  2404. function FormKey(
  2405. const AWeightArray : TUCA_PropWeightsArray;
  2406. const ACollation : PUCA_DataBook
  2407. ) : TUCASortKey;inline;
  2408. begin
  2409. case ACollation.VariableWeight of
  2410. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
  2411. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
  2412. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
  2413. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
  2414. else
  2415. Result := FormKeyShifted(AWeightArray,ACollation);
  2416. end;
  2417. end;
  2418. function ComputeRawSortKeyNextItem(
  2419. const AContext : PComputeKeyContext
  2420. ) : Boolean;forward;
  2421. function IncrementalCompareString_NonIgnorable(
  2422. const AStrA : PUnicodeChar;
  2423. const ALengthA : SizeInt;
  2424. const AStrB : PUnicodeChar;
  2425. const ALengthB : SizeInt;
  2426. const ACollation : PUCA_DataBook
  2427. ) : Integer;
  2428. var
  2429. ctxA, ctxB : TComputeKeyContext;
  2430. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2431. keyIndexB : Integer;
  2432. keyA, keyB : TUCASortKey;
  2433. begin
  2434. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2435. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2436. (ALengthA = ALengthB)
  2437. )
  2438. then
  2439. exit(0);
  2440. if (ALengthA = 0) then
  2441. exit(-1);
  2442. if (ALengthB = 0) then
  2443. exit(1);
  2444. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2445. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2446. lastKeyIndexA := -1;
  2447. keyIndexA := -1;
  2448. lengthMaxA := 0;
  2449. keyIndexB := -1;
  2450. while True do begin
  2451. if not ComputeRawSortKeyNextItem(@ctxA) then
  2452. Break;
  2453. if (ctxA.ral = lengthMaxA) then
  2454. Continue;
  2455. lengthMaxA := ctxA.ral;
  2456. keyIndexA := lastKeyIndexA + 1;
  2457. while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
  2458. Inc(keyIndexA);
  2459. end;
  2460. if (keyIndexA = lengthMaxA) then begin
  2461. lastKeyIndexA := keyIndexA-1;
  2462. Continue;
  2463. end;
  2464. while (keyIndexA < lengthMaxA) do begin
  2465. if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
  2466. Inc(keyIndexA);
  2467. Continue;
  2468. end;
  2469. Inc(keyIndexB);
  2470. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2471. if (ctxB.ral <= keyIndexB) then begin
  2472. if not ComputeRawSortKeyNextItem(@ctxB) then
  2473. Break;
  2474. Continue;
  2475. end;
  2476. Inc(keyIndexB);
  2477. end;
  2478. if (ctxB.ral <= keyIndexB) then
  2479. exit(1);
  2480. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2481. exit(1);
  2482. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2483. exit(-1);
  2484. Inc(keyIndexA);
  2485. end;
  2486. lastKeyIndexA := keyIndexA - 1;
  2487. end;
  2488. //Key(A) is completed !
  2489. Inc(keyIndexB);
  2490. while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
  2491. if (ctxB.ral <= keyIndexB) then begin
  2492. if not ComputeRawSortKeyNextItem(@ctxB) then
  2493. Break;
  2494. Continue;
  2495. end;
  2496. Inc(keyIndexB);
  2497. end;
  2498. if (ctxB.ral > keyIndexB) then begin
  2499. //B has at least one more primary weight that A
  2500. exit(-1);
  2501. end;
  2502. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2503. //
  2504. end;
  2505. //Key(B) is completed !
  2506. keyA := FormKey(ctxA.r,ctxA.Collation);
  2507. keyB := FormKey(ctxB.r,ctxB.Collation);
  2508. Result := CompareSortKey(keyA,keyB);
  2509. end;
  2510. function IncrementalCompareString_Shift(
  2511. const AStrA : PUnicodeChar;
  2512. const ALengthA : SizeInt;
  2513. const AStrB : PUnicodeChar;
  2514. const ALengthB : SizeInt;
  2515. const ACollation : PUCA_DataBook
  2516. ) : Integer;
  2517. var
  2518. ctxA, ctxB : TComputeKeyContext;
  2519. lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
  2520. keyIndexB : Integer;
  2521. keyA, keyB : TUCASortKey;
  2522. begin
  2523. if ( (ALengthA = 0) and (ALengthB = 0) ) or
  2524. ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
  2525. (ALengthA = ALengthB)
  2526. )
  2527. then
  2528. exit(0);
  2529. if (ALengthA = 0) then
  2530. exit(-1);
  2531. if (ALengthB = 0) then
  2532. exit(1);
  2533. InitContext(@ctxA,AStrA,ALengthA,ACollation);
  2534. InitContext(@ctxB,AStrB,ALengthB,ACollation);
  2535. lastKeyIndexA := -1;
  2536. keyIndexA := -1;
  2537. lengthMaxA := 0;
  2538. keyIndexB := -1;
  2539. while True do begin
  2540. if not ComputeRawSortKeyNextItem(@ctxA) then
  2541. Break;
  2542. if (ctxA.ral = lengthMaxA) then
  2543. Continue;
  2544. lengthMaxA := ctxA.ral;
  2545. keyIndexA := lastKeyIndexA + 1;
  2546. while (keyIndexA < lengthMaxA) and
  2547. ( (ctxA.r[keyIndexA].Weights[0] = 0) or
  2548. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2549. )
  2550. do begin
  2551. Inc(keyIndexA);
  2552. end;
  2553. if (keyIndexA = lengthMaxA) then begin
  2554. lastKeyIndexA := keyIndexA-1;
  2555. Continue;
  2556. end;
  2557. while (keyIndexA < lengthMaxA) do begin
  2558. if (ctxA.r[keyIndexA].Weights[0] = 0) or
  2559. ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
  2560. then begin
  2561. Inc(keyIndexA);
  2562. Continue;
  2563. end;
  2564. Inc(keyIndexB);
  2565. while (ctxB.ral <= keyIndexB) or
  2566. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2567. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2568. do begin
  2569. if (ctxB.ral <= keyIndexB) then begin
  2570. if not ComputeRawSortKeyNextItem(@ctxB) then
  2571. Break;
  2572. Continue;
  2573. end;
  2574. Inc(keyIndexB);
  2575. end;
  2576. if (ctxB.ral <= keyIndexB) then
  2577. exit(1);
  2578. if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
  2579. exit(1);
  2580. if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
  2581. exit(-1);
  2582. Inc(keyIndexA);
  2583. end;
  2584. lastKeyIndexA := keyIndexA - 1;
  2585. end;
  2586. //Key(A) is completed !
  2587. Inc(keyIndexB);
  2588. while (ctxB.ral <= keyIndexB) or
  2589. (ctxB.r[keyIndexB].Weights[0] = 0) or
  2590. ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
  2591. do begin
  2592. if (ctxB.ral <= keyIndexB) then begin
  2593. if not ComputeRawSortKeyNextItem(@ctxB) then
  2594. Break;
  2595. Continue;
  2596. end;
  2597. Inc(keyIndexB);
  2598. end;
  2599. if (ctxB.ral > keyIndexB) then begin
  2600. //B has at least one more primary weight that A
  2601. exit(-1);
  2602. end;
  2603. while ComputeRawSortKeyNextItem(@ctxB) do begin
  2604. //
  2605. end;
  2606. //Key(B) is completed !
  2607. keyA := FormKey(ctxA.r,ctxA.Collation);
  2608. keyB := FormKey(ctxB.r,ctxB.Collation);
  2609. Result := CompareSortKey(keyA,keyB);
  2610. end;
  2611. function IncrementalCompareString(
  2612. const AStrA : PUnicodeChar;
  2613. const ALengthA : SizeInt;
  2614. const AStrB : PUnicodeChar;
  2615. const ALengthB : SizeInt;
  2616. const ACollation : PUCA_DataBook
  2617. ) : Integer;
  2618. begin
  2619. case ACollation^.VariableWeight of
  2620. TUCA_VariableKind.ucaNonIgnorable :
  2621. begin
  2622. Result := IncrementalCompareString_NonIgnorable(
  2623. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2624. );
  2625. end;
  2626. TUCA_VariableKind.ucaBlanked,
  2627. TUCA_VariableKind.ucaShiftedTrimmed,
  2628. TUCA_VariableKind.ucaIgnoreSP,
  2629. TUCA_VariableKind.ucaShifted:
  2630. begin
  2631. Result := IncrementalCompareString_Shift(
  2632. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2633. );
  2634. end;
  2635. else
  2636. begin
  2637. Result := IncrementalCompareString_Shift(
  2638. AStrA,ALengthA,AStrB,ALengthB,ACollation
  2639. );
  2640. end;
  2641. end;
  2642. end;
  2643. function IncrementalCompareString(
  2644. const AStrA,
  2645. AStrB : UnicodeString;
  2646. const ACollation : PUCA_DataBook
  2647. ) : Integer;
  2648. begin
  2649. Result := IncrementalCompareString(
  2650. Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
  2651. ACollation
  2652. );
  2653. end;
  2654. function ComputeRawSortKeyNextItem(
  2655. const AContext : PComputeKeyContext
  2656. ) : Boolean;
  2657. var
  2658. ctx : PComputeKeyContext;
  2659. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  2660. begin
  2661. if (ctx^.rl < AMinGrow) then
  2662. ctx^.rl := ctx^.rl + AMinGrow
  2663. else
  2664. ctx^.rl := 2 * ctx^.rl;
  2665. SetLength(ctx^.r,ctx^.rl);
  2666. end;
  2667. procedure SaveKeyOwner();
  2668. var
  2669. k : Integer;
  2670. kppLevel : Byte;
  2671. begin
  2672. k := 0;
  2673. kppLevel := High(Byte);
  2674. while (k <= ctx^.locHistoryTop) do begin
  2675. if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
  2676. ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
  2677. kppLevel := ctx^.locHistory[k].ppLevel;
  2678. end;
  2679. k := k + 1;
  2680. end;
  2681. if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
  2682. ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
  2683. k := k + 1;
  2684. end;
  2685. ctx^.LastKeyOwner.Length := k;
  2686. end;
  2687. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  2688. begin
  2689. SaveKeyOwner();
  2690. if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
  2691. GrowKey(AItem^.WeightLength);
  2692. AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
  2693. ctx^.ral := ctx^.ral + AItem^.WeightLength;
  2694. end;
  2695. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  2696. begin
  2697. if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
  2698. GrowKey(AItem^.WeightCount);
  2699. Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
  2700. ctx^.ral := ctx^.ral + AItem^.WeightCount;
  2701. end;
  2702. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  2703. begin
  2704. SaveKeyOwner();
  2705. if ((ctx^.ral + 2) > ctx^.rl) then
  2706. GrowKey();
  2707. DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
  2708. ctx^.ral := ctx^.ral + 2;
  2709. end;
  2710. procedure RecordDeletion();inline;
  2711. begin
  2712. if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  2713. if (ctx^.suppressState.cl = nil) or
  2714. (ctx^.suppressState.CharCount > ctx^.ppLevel)
  2715. then begin
  2716. ctx^.suppressState.cl := ctx^.cl;
  2717. ctx^.suppressState.CharCount := ctx^.ppLevel;
  2718. end;
  2719. end;
  2720. end;
  2721. procedure RecordStep();inline;
  2722. begin
  2723. Inc(ctx^.locHistoryTop);
  2724. ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
  2725. ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
  2726. ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
  2727. ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
  2728. ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
  2729. ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
  2730. RecordDeletion();
  2731. end;
  2732. procedure ClearHistory();inline;
  2733. begin
  2734. ctx^.locHistoryTop := -1;
  2735. end;
  2736. function HasHistory() : Boolean;inline;
  2737. begin
  2738. Result := (ctx^.locHistoryTop >= 0);
  2739. end;
  2740. function GetHistoryLength() : Integer;inline;
  2741. begin
  2742. Result := (ctx^.locHistoryTop + 1);
  2743. end;
  2744. procedure GoBack();inline;
  2745. begin
  2746. Assert(ctx^.locHistoryTop >= 0);
  2747. ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
  2748. ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
  2749. ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
  2750. ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
  2751. ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
  2752. ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
  2753. ctx^.ps := @ctx^.s[ctx^.i];
  2754. Dec(ctx^.locHistoryTop);
  2755. end;
  2756. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  2757. var
  2758. k : DWord;
  2759. pk : PUnicodeChar;
  2760. puk : PUC_Prop;
  2761. begin
  2762. k := AStartFrom;
  2763. if (k > ctx^.c) then
  2764. exit(False);
  2765. if (ctx^.removedCharIndexLength>0) and
  2766. (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,k) >= 0)
  2767. then begin
  2768. exit(False);
  2769. end;
  2770. {if (k = (i+1)) or
  2771. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  2772. then
  2773. lastUnblockedNonstarterCCC := 0;}
  2774. pk := @ctx^.s[k];
  2775. if UnicodeIsHighSurrogate(pk^) then begin
  2776. if (k = ctx^.c) then
  2777. exit(False);
  2778. if UnicodeIsLowSurrogate(pk[1]) then
  2779. puk := GetProps(pk[0],pk[1])
  2780. else
  2781. puk := GetProps(Word(pk^));
  2782. end else begin
  2783. puk := GetProps(Word(pk^));
  2784. end;
  2785. if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
  2786. exit(False);
  2787. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  2788. Result := True;
  2789. end;
  2790. procedure RemoveChar(APos : Integer);inline;
  2791. begin
  2792. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2793. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2794. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
  2795. Inc(ctx^.removedCharIndexLength);
  2796. if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
  2797. if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
  2798. SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
  2799. ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
  2800. Inc(ctx^.removedCharIndexLength);
  2801. end;
  2802. end;
  2803. procedure Inc_I();inline;
  2804. begin
  2805. if (ctx^.removedCharIndexLength = 0) then begin
  2806. Inc(ctx^.i);
  2807. Inc(ctx^.ps);
  2808. exit;
  2809. end;
  2810. while True do begin
  2811. Inc(ctx^.i);
  2812. Inc(ctx^.ps);
  2813. if (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,ctx^.i) = -1) then
  2814. Break;
  2815. end;
  2816. end;
  2817. function MoveToNextChar() : Boolean;inline;
  2818. begin
  2819. Result := True;
  2820. if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
  2821. if (ctx^.i = ctx^.c) then
  2822. exit(False);
  2823. if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  2824. ctx^.surrogateState := True;
  2825. ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
  2826. end else begin
  2827. ctx^.surrogateState := False;
  2828. ctx^.cp := Word(ctx^.ps[0]);
  2829. end;
  2830. end else begin
  2831. ctx^.surrogateState := False;
  2832. ctx^.cp := Word(ctx^.ps[0]);
  2833. end;
  2834. end;
  2835. function FindPropUCA() : Boolean;
  2836. var
  2837. candidateCL : PUCA_DataBook;
  2838. begin
  2839. ctx^.pp := nil;
  2840. if (ctx^.cl = nil) then
  2841. candidateCL := ctx^.Collation
  2842. else
  2843. candidateCL := ctx^.cl;
  2844. if ctx^.surrogateState then begin
  2845. while (candidateCL <> nil) do begin
  2846. ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
  2847. if (ctx^.pp <> nil) then
  2848. break;
  2849. candidateCL := candidateCL^.Base;
  2850. end;
  2851. end else begin
  2852. while (candidateCL <> nil) do begin
  2853. ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
  2854. if (ctx^.pp <> nil) then
  2855. break;
  2856. candidateCL := candidateCL^.Base;
  2857. end;
  2858. end;
  2859. ctx^.cl := candidateCL;
  2860. Result := (ctx^.pp <> nil);
  2861. end;
  2862. procedure AddWeightsAndClear();inline;
  2863. var
  2864. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2865. begin
  2866. if (ctx^.pp^.WeightLength > 0) then begin
  2867. AddWeights(ctx^.pp);
  2868. end else
  2869. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2870. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2871. (ctxNode^.Data.WeightCount > 0)
  2872. then begin
  2873. AddContextWeights(@ctxNode^.Data);
  2874. end;
  2875. //AddWeights(pp);
  2876. ClearHistory();
  2877. ClearPP(ctx);
  2878. end;
  2879. function StartMatch() : Boolean;
  2880. procedure HandleLastChar();
  2881. var
  2882. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2883. begin
  2884. while True do begin
  2885. if ctx^.pp^.IsValid() then begin
  2886. if (ctx^.pp^.WeightLength > 0) then
  2887. AddWeights(ctx^.pp)
  2888. else
  2889. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2890. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  2891. (ctxNode^.Data.WeightCount > 0)
  2892. then
  2893. AddContextWeights(@ctxNode^.Data)
  2894. else
  2895. AddComputedWeights(ctx^.cp){handle deletion of code point};
  2896. break;
  2897. end;
  2898. if (ctx^.cl^.Base = nil) then begin
  2899. AddComputedWeights(ctx^.cp);
  2900. break;
  2901. end;
  2902. ctx^.cl := ctx^.cl^.Base;
  2903. if not FindPropUCA() then begin
  2904. AddComputedWeights(ctx^.cp);
  2905. break;
  2906. end;
  2907. end;
  2908. end;
  2909. var
  2910. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  2911. begin
  2912. Result := False;
  2913. ctx^.ppLevel := 0;
  2914. if not FindPropUCA() then begin
  2915. AddComputedWeights(ctx^.cp);
  2916. ClearHistory();
  2917. ClearPP(ctx);
  2918. Result := True;
  2919. end else begin
  2920. if (ctx^.i = ctx^.c) then begin
  2921. HandleLastChar();
  2922. Result := True;
  2923. end else begin
  2924. if ctx^.pp^.IsValid()then begin
  2925. if (ctx^.pp^.ChildCount = 0) then begin
  2926. if (ctx^.pp^.WeightLength > 0) then
  2927. AddWeights(ctx^.pp)
  2928. else
  2929. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  2930. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
  2931. (tmpCtxNode^.Data.WeightCount > 0)
  2932. then
  2933. AddContextWeights(@tmpCtxNode^.Data)
  2934. else
  2935. AddComputedWeights(ctx^.cp){handle deletion of code point};
  2936. ClearPP(ctx);
  2937. ClearHistory();
  2938. Result := True;
  2939. end else begin
  2940. RecordStep();
  2941. end
  2942. end else begin
  2943. if (ctx^.pp^.ChildCount = 0) then begin
  2944. AddComputedWeights(ctx^.cp);
  2945. ClearPP(ctx);
  2946. ClearHistory();
  2947. Result := True;
  2948. end else begin
  2949. RecordStep();
  2950. end;
  2951. end;
  2952. end;
  2953. end;
  2954. end;
  2955. function TryPermutation() : Boolean;
  2956. var
  2957. kk : Integer;
  2958. b : Boolean;
  2959. puk : PUC_Prop;
  2960. ppk : PUCA_PropItemRec;
  2961. begin
  2962. Result := False;
  2963. puk := GetProps(ctx^.cp);
  2964. if (puk^.CCC = 0) then
  2965. exit;
  2966. ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
  2967. if ctx^.surrogateState then
  2968. kk := ctx^.i + 2
  2969. else
  2970. kk := ctx^.i + 1;
  2971. while IsUnblockedNonstarter(kk) do begin
  2972. b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
  2973. if b then
  2974. ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
  2975. else
  2976. ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
  2977. if (ppk <> nil) then begin
  2978. ctx^.pp := ppk;
  2979. RemoveChar(kk);
  2980. Inc(ctx^.ppLevel);
  2981. RecordStep();
  2982. Result := True;
  2983. if (ctx^.pp^.ChildCount = 0 ) then
  2984. Break;
  2985. end;
  2986. if b then
  2987. Inc(kk);
  2988. Inc(kk);
  2989. end;
  2990. end;
  2991. procedure AdvanceCharPos();inline;
  2992. begin
  2993. if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
  2994. Inc(ctx^.i);
  2995. Inc(ctx^.ps);
  2996. end;
  2997. Inc_I();
  2998. end;
  2999. var
  3000. ok : Boolean;
  3001. pp1 : PUCA_PropItemRec;
  3002. cltemp : PUCA_DataBook;
  3003. ctxNode : PUCA_PropItemContextTreeNodeRec;
  3004. begin
  3005. if AContext^.Finished then
  3006. exit(False);
  3007. ctx := AContext;
  3008. while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
  3009. ok := False;
  3010. if (ctx^.pp = nil) then begin // Start Matching
  3011. ok := StartMatch();
  3012. end else begin
  3013. pp1 := FindChild(ctx^.cp,ctx^.pp);
  3014. if (pp1 <> nil) then begin
  3015. Inc(ctx^.ppLevel);
  3016. ctx^.pp := pp1;
  3017. if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
  3018. ok := False;
  3019. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3020. if (ctx^.pp^.WeightLength > 0) then begin
  3021. AddWeightsAndClear();
  3022. ok := True;
  3023. end else
  3024. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3025. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3026. (ctxNode^.Data.WeightCount > 0)
  3027. then begin
  3028. AddContextWeights(@ctxNode^.Data);
  3029. ClearHistory();
  3030. ClearPP(ctx);
  3031. ok := True;
  3032. end
  3033. end;
  3034. if not ok then begin
  3035. RecordDeletion();
  3036. while HasHistory() do begin
  3037. GoBack();
  3038. if ctx^.pp^.IsValid() and
  3039. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3040. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3041. )
  3042. then begin
  3043. AddWeightsAndClear();
  3044. ok := True;
  3045. Break;
  3046. end;
  3047. end;
  3048. if not ok then begin
  3049. cltemp := ctx^.cl^.Base;
  3050. if (cltemp <> nil) then begin
  3051. ClearPP(ctx,False);
  3052. ctx^.cl := cltemp;
  3053. Continue;
  3054. end;
  3055. end;
  3056. if not ok then begin
  3057. AddComputedWeights(ctx^.cp);
  3058. ClearHistory();
  3059. ClearPP(ctx);
  3060. ok := True;
  3061. end;
  3062. end;
  3063. end else begin
  3064. RecordStep();
  3065. end;
  3066. end else begin
  3067. // permutations !
  3068. ok := False;
  3069. if TryPermutation() and ctx^.pp^.IsValid() then begin
  3070. if (ctx^.suppressState.CharCount = 0) then begin
  3071. AddWeightsAndClear();
  3072. ok := True;
  3073. exit(True);// Continue;
  3074. end;
  3075. while True do begin
  3076. if ctx^.pp^.IsValid() and
  3077. (ctx^.pp^.WeightLength > 0) and
  3078. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3079. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3080. )
  3081. then begin
  3082. AddWeightsAndClear();
  3083. ok := True;
  3084. break;
  3085. end;
  3086. if not HasHistory() then
  3087. break;
  3088. GoBack();
  3089. if (ctx^.pp = nil) then
  3090. break;
  3091. end;
  3092. end;
  3093. if not ok then begin
  3094. if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
  3095. if (ctx^.pp^.WeightLength > 0) then begin
  3096. AddWeightsAndClear();
  3097. ok := True;
  3098. end else
  3099. if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
  3100. ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
  3101. (ctxNode^.Data.WeightCount > 0)
  3102. then begin
  3103. AddContextWeights(@ctxNode^.Data);
  3104. ClearHistory();
  3105. ClearPP(ctx);
  3106. ok := True;
  3107. end
  3108. end;
  3109. if ok then
  3110. exit(True);// Continue;
  3111. end;
  3112. if not ok then begin
  3113. if (ctx^.cl^.Base <> nil) then begin
  3114. cltemp := ctx^.cl^.Base;
  3115. while HasHistory() do
  3116. GoBack();
  3117. ctx^.pp := nil;
  3118. ctx^.ppLevel := 0;
  3119. ctx^.cl := cltemp;
  3120. Continue;
  3121. end;
  3122. //walk back
  3123. ok := False;
  3124. while HasHistory() do begin
  3125. GoBack();
  3126. if ctx^.pp^.IsValid() and
  3127. (ctx^.pp^.WeightLength > 0) and
  3128. ( (ctx^.suppressState.CharCount = 0) or
  3129. ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
  3130. ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
  3131. )
  3132. )
  3133. then begin
  3134. AddWeightsAndClear();
  3135. ok := True;
  3136. Break;
  3137. end;
  3138. end;
  3139. if ok then begin
  3140. AdvanceCharPos();
  3141. exit(True);// Continue;
  3142. end;
  3143. if (ctx^.pp <> nil) then begin
  3144. AddComputedWeights(ctx^.cp);
  3145. ClearHistory();
  3146. ClearPP(ctx);
  3147. ok := True;
  3148. end;
  3149. end;
  3150. end;
  3151. end;
  3152. if ctx^.surrogateState then begin
  3153. Inc(ctx^.ps);
  3154. Inc(ctx^.i);
  3155. end;
  3156. //
  3157. Inc_I();
  3158. if ok then
  3159. exit(True);
  3160. end;
  3161. SetLength(ctx^.r,ctx^.ral);
  3162. ctx^.Finished := True;
  3163. Result := True;
  3164. end;
  3165. function ComputeSortKey(
  3166. const AStr : PUnicodeChar;
  3167. const ALength : SizeInt;
  3168. const ACollation : PUCA_DataBook
  3169. ) : TUCASortKey;
  3170. var
  3171. r : TUCA_PropWeightsArray;
  3172. begin
  3173. r := ComputeRawSortKey(AStr,ALength,ACollation);
  3174. Result := FormKey(r,ACollation);
  3175. end;
  3176. end.