unicodedata.pas 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467
  1. { Unicode tables unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit unicodedata;
  17. {$mode delphi}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$SCOPEDENUMS ON}
  21. {$pointermath on}
  22. { $modeswitch advancedrecords}
  23. { $define uni_debug}
  24. interface
  25. uses
  26. SysUtils;
  27. const
  28. MAX_WORD = High(Word);
  29. LOW_SURROGATE_BEGIN = Word($DC00);
  30. LOW_SURROGATE_END = Word($DFFF);
  31. HIGH_SURROGATE_BEGIN = Word($D800);
  32. HIGH_SURROGATE_END = Word($DBFF);
  33. HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
  34. UCS4_HALF_BASE = LongWord($10000);
  35. UCS4_HALF_MASK = Word($3FF);
  36. MAX_LEGAL_UTF32 = $10FFFF;
  37. const
  38. // Unicode General Category
  39. UGC_UppercaseLetter = 0;
  40. UGC_LowercaseLetter = 1;
  41. UGC_TitlecaseLetter = 2;
  42. UGC_ModifierLetter = 3;
  43. UGC_OtherLetter = 4;
  44. UGC_NonSpacingMark = 5;
  45. UGC_CombiningMark = 6;
  46. UGC_EnclosingMark = 7;
  47. UGC_DecimalNumber = 8;
  48. UGC_LetterNumber = 9;
  49. UGC_OtherNumber = 10;
  50. UGC_ConnectPunctuation = 11;
  51. UGC_DashPunctuation = 12;
  52. UGC_OpenPunctuation = 13;
  53. UGC_ClosePunctuation = 14;
  54. UGC_InitialPunctuation = 15;
  55. UGC_FinalPunctuation = 16;
  56. UGC_OtherPunctuation = 17;
  57. UGC_MathSymbol = 18;
  58. UGC_CurrencySymbol = 19;
  59. UGC_ModifierSymbol = 20;
  60. UGC_OtherSymbol = 21;
  61. UGC_SpaceSeparator = 22;
  62. UGC_LineSeparator = 23;
  63. UGC_ParagraphSeparator = 24;
  64. UGC_Control = 25;
  65. UGC_Format = 26;
  66. UGC_Surrogate = 27;
  67. UGC_PrivateUse = 28;
  68. UGC_Unassigned = 29;
  69. type
  70. TUInt24Rec = packed record
  71. public
  72. {$ifdef FPC_LITTLE_ENDIAN}
  73. byte0, byte1, byte2 : Byte;
  74. {$else FPC_LITTLE_ENDIAN}
  75. byte2, byte1, byte0 : Byte;
  76. {$endif FPC_LITTLE_ENDIAN}
  77. public
  78. class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
  79. class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
  80. class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
  81. class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
  82. class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
  83. class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  84. class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  85. class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  86. class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  87. class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  88. class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  89. class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  90. class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  91. class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  92. class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  93. class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  94. class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  95. class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  96. class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  97. class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  98. class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  99. class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  100. class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  101. class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  102. class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  103. class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  104. class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  105. class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  106. class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
  107. end;
  108. UInt24 = TUInt24Rec;
  109. PUInt24 = ^UInt24;
  110. const
  111. ZERO_UINT24 : UInt24 =
  112. {$ifdef FPC_LITTLE_ENDIAN}
  113. (byte0 : 0; byte1 : 0; byte2 : 0;);
  114. {$else FPC_LITTLE_ENDIAN}
  115. (byte2 : 0; byte1 : 0; byte0 : 0;);
  116. {$endif FPC_LITTLE_ENDIAN}
  117. type
  118. EUnicodeException = class(Exception)
  119. end;
  120. PUC_Prop = ^TUC_Prop;
  121. { TUC_Prop }
  122. TUC_Prop = packed record
  123. private
  124. function GetCategory : Byte;inline;
  125. procedure SetCategory(AValue : Byte);
  126. function GetWhiteSpace : Boolean;inline;
  127. procedure SetWhiteSpace(AValue : Boolean);
  128. function GetHangulSyllable : Boolean;inline;
  129. procedure SetHangulSyllable(AValue : Boolean);
  130. function GetNumericValue: Double;inline;
  131. public
  132. CategoryData : Byte;
  133. public
  134. CCC : Byte;
  135. NumericIndex : Byte;
  136. SimpleUpperCase : UInt24;
  137. SimpleLowerCase : UInt24;
  138. DecompositionID : SmallInt;
  139. public
  140. property Category : Byte read GetCategory write SetCategory;
  141. property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
  142. property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
  143. property NumericValue : Double read GetNumericValue;
  144. end;
  145. const
  146. BIT_POS_VALIDE = 0;
  147. type
  148. TWeightLength = 0..24;
  149. TUCA_PropWeights = packed record
  150. Weights : array[0..2] of Word;
  151. end;
  152. PUCA_PropWeights = ^TUCA_PropWeights;
  153. TUCA_PropItemContextRec = packed record
  154. public
  155. CodePointCount : Byte;
  156. WeightCount : Byte;
  157. public
  158. function GetCodePoints() : PUInt24;inline;
  159. function GetWeights() : PUCA_PropWeights;inline;
  160. end;
  161. PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
  162. PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
  163. TUCA_PropItemContextTreeNodeRec = packed record
  164. public
  165. Left : Word;
  166. Right : Word;
  167. Data : TUCA_PropItemContextRec;
  168. public
  169. function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
  170. function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
  171. end;
  172. { TUCA_PropItemContextTreeRec }
  173. TUCA_PropItemContextTreeRec = packed record
  174. public
  175. Size : UInt24;
  176. public
  177. function GetData:PUCA_PropItemContextTreeNodeRec;inline;
  178. property Data : PUCA_PropItemContextTreeNodeRec read GetData;
  179. function Find(
  180. const AChars : PUInt24;
  181. const ACharCount : Integer;
  182. out ANode : PUCA_PropItemContextTreeNodeRec
  183. ) : Boolean;
  184. end;
  185. PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
  186. { TUCA_PropItemRec }
  187. TUCA_PropItemRec = packed record
  188. private
  189. const FLAG_CODEPOINT = 1;
  190. const FLAG_CONTEXTUAL = 2;
  191. const FLAG_DELETION = 3;
  192. private
  193. function GetWeightLength: TWeightLength;inline;
  194. //procedure SetWeightLength(AValue: TWeightLength);inline;
  195. function GetCodePoint() : UInt24;inline;
  196. public
  197. Valid : Byte;// On First Bit
  198. ChildCount : Byte;
  199. Size : Word;
  200. Flags : Byte;
  201. public
  202. function HasCodePoint() : Boolean;inline;
  203. property CodePoint : UInt24 read GetCodePoint;
  204. //WeightLength is stored in the 5 last bits of "Valid"
  205. property WeightLength : TWeightLength read GetWeightLength;// write SetWeightLength;
  206. //Weights : array[0..WeightLength] of TUCA_PropWeights;
  207. function IsValid() : Boolean;inline;
  208. //function GetWeightArray() : PUCA_PropWeights;inline;
  209. procedure GetWeightArray(ADest : PUCA_PropWeights);
  210. function GetSelfOnlySize() : Word;inline;
  211. function GetContextual() : Boolean;inline;
  212. property Contextual : Boolean read GetContextual;
  213. function GetContext() : PUCA_PropItemContextTreeRec;
  214. function IsDeleted() : Boolean;inline;
  215. end;
  216. PUCA_PropItemRec = ^TUCA_PropItemRec;
  217. TUCA_VariableKind = (
  218. ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
  219. ucaIgnoreSP // This one is not implemented !
  220. );
  221. TCollationName = string[128];
  222. PUCA_DataBook = ^TUCA_DataBook;
  223. TUCA_DataBook = packed record
  224. public
  225. Base : PUCA_DataBook;
  226. Version : TCollationName;
  227. CollationName : TCollationName;
  228. VariableWeight : TUCA_VariableKind;
  229. Backwards : array[0..3] of Boolean;
  230. BMP_Table1 : PByte;
  231. BMP_Table2 : PUInt24;
  232. OBMP_Table1 : PWord;
  233. OBMP_Table2 : PUInt24;
  234. PropCount : Integer;
  235. Props : PUCA_PropItemRec;
  236. VariableLowLimit : Word;
  237. VariableHighLimit : Word;
  238. public
  239. function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
  240. end;
  241. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  242. TCollationFields = set of TCollationField;
  243. const
  244. ROOT_COLLATION_NAME = 'DUCET';
  245. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  246. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  247. function UnicodeIsSurrogatePair(
  248. const AHighSurrogate,
  249. ALowSurrogate : UnicodeChar
  250. ) : Boolean;inline;
  251. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  252. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
  253. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  254. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  255. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
  256. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
  257. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
  258. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
  259. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
  260. procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
  261. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
  262. type
  263. TUCASortKeyItem = Word;
  264. TUCASortKey = array of TUCASortKeyItem;
  265. function ComputeSortKey(
  266. const AString : UnicodeString;
  267. const ACollation : PUCA_DataBook
  268. ) : TUCASortKey;inline;overload;
  269. function ComputeSortKey(
  270. const AStr : PUnicodeChar;
  271. const ALength : SizeInt;
  272. const ACollation : PUCA_DataBook
  273. ) : TUCASortKey;overload;
  274. function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
  275. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
  276. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  277. function UnregisterCollation(const AName : ansistring): Boolean;
  278. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  279. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  280. function GetCollationCount() : Integer;
  281. procedure PrepareCollation(
  282. ACollation : PUCA_DataBook;
  283. const ABaseName : ansistring;
  284. const AChangedFields : TCollationFields
  285. );
  286. resourcestring
  287. SCollationNotFound = 'Collation not found : "%s".';
  288. implementation
  289. uses
  290. unicodenumtable;
  291. type
  292. TCardinalRec = packed record
  293. {$ifdef FPC_LITTLE_ENDIAN}
  294. byte0, byte1, byte2, byte3 : Byte;
  295. {$else FPC_LITTLE_ENDIAN}
  296. byte3, byte2, byte1, byte0 : Byte;
  297. {$endif FPC_LITTLE_ENDIAN}
  298. end;
  299. TWordRec = packed record
  300. {$ifdef FPC_LITTLE_ENDIAN}
  301. byte0, byte1 : Byte;
  302. {$else FPC_LITTLE_ENDIAN}
  303. byte1, byte0 : Byte;
  304. {$endif FPC_LITTLE_ENDIAN}
  305. end;
  306. { TUInt24Rec }
  307. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
  308. begin
  309. TCardinalRec(Result).byte0 := a.byte0;
  310. TCardinalRec(Result).byte1 := a.byte1;
  311. TCardinalRec(Result).byte2 := a.byte2;
  312. TCardinalRec(Result).byte3 := 0;
  313. end;
  314. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
  315. begin
  316. Result := Cardinal(a);
  317. end;
  318. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
  319. begin
  320. {$IFOPT R+}
  321. if (a > $FFFF) then
  322. Error(reIntOverflow);
  323. {$ENDIF R+}
  324. TWordRec(Result).byte0 := a.byte0;
  325. TWordRec(Result).byte1 := a.byte1;
  326. end;
  327. class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
  328. begin
  329. {$IFOPT R+}
  330. if (a > $FF) then
  331. Error(reIntOverflow);
  332. {$ENDIF R+}
  333. Result := a.byte0;
  334. end;
  335. class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
  336. begin
  337. {$IFOPT R+}
  338. if (a > $FFFFFF) then
  339. Error(reIntOverflow);
  340. {$ENDIF R+}
  341. Result.byte0 := TCardinalRec(a).byte0;
  342. Result.byte1 := TCardinalRec(a).byte1;
  343. Result.byte2 := TCardinalRec(a).byte2;
  344. end;
  345. class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
  346. begin
  347. Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
  348. end;
  349. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
  350. begin
  351. Result := (TCardinalRec(b).byte3 = 0) and
  352. (a.byte0 = TCardinalRec(b).byte0) and
  353. (a.byte1 = TCardinalRec(b).byte1) and
  354. (a.byte2 = TCardinalRec(b).byte2);
  355. end;
  356. class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
  357. begin
  358. Result := (b = a);
  359. end;
  360. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
  361. begin
  362. Result := (LongInt(a) = b);
  363. end;
  364. class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
  365. begin
  366. Result := (b = a);
  367. end;
  368. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
  369. begin
  370. Result := (a.byte2 = 0) and
  371. (a.byte0 = TWordRec(b).byte0) and
  372. (a.byte1 = TWordRec(b).byte1);
  373. end;
  374. class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
  375. begin
  376. Result := (b = a);
  377. end;
  378. class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
  379. begin
  380. Result := (a.byte2 = 0) and
  381. (a.byte1 = 0) and
  382. (a.byte0 = b);
  383. end;
  384. class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
  385. begin
  386. Result := (b = a);
  387. end;
  388. class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
  389. begin
  390. Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
  391. end;
  392. class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
  393. begin
  394. Result := (TCardinalRec(b).byte3 <> 0) or
  395. (a.byte0 <> TCardinalRec(b).byte0) or
  396. (a.byte1 <> TCardinalRec(b).byte1) or
  397. (a.byte2 <> TCardinalRec(b).byte2);
  398. end;
  399. class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
  400. begin
  401. Result := (b <> a);
  402. end;
  403. class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
  404. begin
  405. Result := (a.byte2 > b.byte2) or
  406. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  407. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
  408. end;
  409. class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
  410. begin
  411. Result := Cardinal(a) > b;
  412. end;
  413. class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
  414. begin
  415. Result := a > Cardinal(b);
  416. end;
  417. class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
  418. begin
  419. Result := (a.byte2 > b.byte2) or
  420. ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
  421. ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
  422. end;
  423. class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  424. begin
  425. Result := Cardinal(a) >= b;
  426. end;
  427. class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  428. begin
  429. Result := a >= Cardinal(b);
  430. end;
  431. class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
  432. begin
  433. Result := (b > a);
  434. end;
  435. class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
  436. begin
  437. Result := Cardinal(a) < b;
  438. end;
  439. class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
  440. begin
  441. Result := a < Cardinal(b);
  442. end;
  443. class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
  444. begin
  445. Result := (b >= a);
  446. end;
  447. class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
  448. begin
  449. Result := Cardinal(a) <= b;
  450. end;
  451. class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
  452. begin
  453. Result := a <= Cardinal(b);
  454. end;
  455. var
  456. CollationTable : array of PUCA_DataBook;
  457. function IndexOfCollation(const AName : string) : Integer;
  458. var
  459. i, c : Integer;
  460. p : Pointer;
  461. begin
  462. c := Length(AName);
  463. p := @AName[1];
  464. for i := 0 to Length(CollationTable) - 1 do begin
  465. if (Length(CollationTable[i]^.CollationName) = c) and
  466. CompareMem(@(CollationTable[i]^.CollationName[1]),p,c)
  467. then
  468. exit(i);
  469. end;
  470. Result := -1;
  471. end;
  472. function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
  473. var
  474. i : Integer;
  475. begin
  476. Result := (IndexOfCollation(ACollation^.CollationName) = -1);
  477. if Result then begin
  478. i := Length(CollationTable);
  479. SetLength(CollationTable,(i+1));
  480. CollationTable[i] := ACollation;
  481. end;
  482. end;
  483. function UnregisterCollation(const AName : ansistring): Boolean;
  484. var
  485. i, c : Integer;
  486. begin
  487. i := IndexOfCollation(AName);
  488. Result := (i >= 0);
  489. if Result then begin
  490. c := Length(CollationTable);
  491. if (c = 1) then begin
  492. SetLength(CollationTable,0);
  493. end else begin
  494. CollationTable[i] := CollationTable[c-1];
  495. SetLength(CollationTable,(c-1));
  496. end;
  497. end;
  498. end;
  499. function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
  500. var
  501. i : Integer;
  502. begin
  503. i := IndexOfCollation(AName);
  504. if (i = -1) then
  505. Result := nil
  506. else
  507. Result := CollationTable[i];
  508. end;
  509. function GetCollationCount() : Integer;
  510. begin
  511. Result := Length(CollationTable);
  512. end;
  513. function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
  514. begin
  515. if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
  516. Result := nil
  517. else
  518. Result := CollationTable[AIndex];
  519. end;
  520. procedure PrepareCollation(
  521. ACollation : PUCA_DataBook;
  522. const ABaseName : ansistring;
  523. const AChangedFields : TCollationFields
  524. );
  525. var
  526. s : ansistring;
  527. p, base : PUCA_DataBook;
  528. begin
  529. if (ABaseName <> '') then
  530. s := ABaseName
  531. else
  532. s := ROOT_COLLATION_NAME;
  533. p := ACollation;
  534. base := FindCollation(s);
  535. if (base = nil) then
  536. raise EUnicodeException.CreateFmt(SCollationNotFound,[s]);
  537. p^.Base := base;
  538. if not(TCollationField.BackWard in AChangedFields) then
  539. p^.Backwards := base^.Backwards;
  540. if not(TCollationField.VariableLowLimit in AChangedFields) then
  541. p^.VariableLowLimit := base^.VariableLowLimit;
  542. if not(TCollationField.VariableHighLimit in AChangedFields) then
  543. p^.VariableLowLimit := base^.VariableHighLimit;
  544. end;
  545. {$INCLUDE unicodedata.inc}
  546. {$IFDEF ENDIAN_LITTLE}
  547. {$INCLUDE unicodedata_le.inc}
  548. {$ENDIF ENDIAN_LITTLE}
  549. {$IFDEF ENDIAN_BIG}
  550. {$INCLUDE unicodedata_be.inc}
  551. {$ENDIF ENDIAN_BIG}
  552. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
  553. begin
  554. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  555. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  556. end;
  557. function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
  558. begin
  559. Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
  560. (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
  561. end;
  562. function UnicodeIsSurrogatePair(
  563. const AHighSurrogate,
  564. ALowSurrogate : UnicodeChar
  565. ) : Boolean;
  566. begin
  567. Result :=
  568. ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
  569. (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
  570. ) and
  571. ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
  572. (Word(ALowSurrogate) <= LOW_SURROGATE_END)
  573. )
  574. end;
  575. function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
  576. begin
  577. Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
  578. (Word(AValue) <= HIGH_SURROGATE_END);
  579. end;
  580. function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
  581. begin
  582. Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
  583. (Word(AValue) <= LOW_SURROGATE_END);
  584. end;
  585. function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
  586. begin
  587. Result:=
  588. @UC_PROP_ARRAY[
  589. UC_TABLE_3[
  590. UC_TABLE_2[UC_TABLE_1[WordRec(ACodePoint).Hi]]
  591. [WordRec(ACodePoint).Lo shr 4]
  592. ][WordRec(ACodePoint).Lo and $F]
  593. ]; {
  594. @UC_PROP_ARRAY[
  595. UC_TABLE_2[
  596. (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
  597. WordRec(ACodePoint).Lo
  598. ]
  599. ];}
  600. end;
  601. function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
  602. begin
  603. Result:=
  604. @UC_PROP_ARRAY[
  605. UCO_TABLE_3[
  606. UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
  607. [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
  608. ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
  609. ]; {
  610. Result:=
  611. @UC_PROP_ARRAY[
  612. UCO_TABLE_2[
  613. (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  614. Word(ALowS) - LOW_SURROGATE_BEGIN
  615. ]
  616. ]; }
  617. end;
  618. function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
  619. var
  620. l, h : UnicodeChar;
  621. begin
  622. if (ACodePoint <= High(Word)) then
  623. exit(GetProps(Word(ACodePoint)));
  624. FromUCS4(ACodePoint,h,l);
  625. Result := GetProps(h,l);
  626. end;
  627. //----------------------------------------------------------------------
  628. function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
  629. const
  630. SBase = $AC00;
  631. LBase = $1100;
  632. VBase = $1161;
  633. TBase = $11A7;
  634. LCount = 19;
  635. VCount = 21;
  636. TCount = 28;
  637. NCount = VCount * TCount; // 588
  638. SCount = LCount * NCount; // 11172
  639. var
  640. SIndex, L, V, T : Integer;
  641. begin
  642. SIndex := AChar - SBase;
  643. if (SIndex < 0) or (SIndex >= SCount) then begin
  644. ABuffer^ := AChar;
  645. exit(1);
  646. end;
  647. L := LBase + SIndex div NCount;
  648. V := VBase + (SIndex mod NCount) div TCount;
  649. T := TBase + SIndex mod TCount;
  650. ABuffer[0] := L;
  651. ABuffer[1] := V;
  652. Result := 2;
  653. if (T <> TBase) then begin
  654. ABuffer[2] := T;
  655. Inc(Result);
  656. end;
  657. end;
  658. function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
  659. var
  660. locStack : array[0..23] of Cardinal;
  661. locStackIdx : Integer;
  662. ResultBuffer : array[0..23] of Cardinal;
  663. ResultIdx : Integer;
  664. procedure AddCompositionToStack(const AIndex : Integer);
  665. var
  666. pdecIdx : ^TDecompositionIndexRec;
  667. k, kc : Integer;
  668. pu : ^UInt24;
  669. begin
  670. pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
  671. pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
  672. kc := pdecIdx^.Length;
  673. Inc(pu,kc);
  674. for k := 1 to kc do begin
  675. Dec(pu);
  676. locStack[locStackIdx + k] := pu^;
  677. end;
  678. locStackIdx := locStackIdx + kc;
  679. end;
  680. procedure AddResult(const AChar : Cardinal);inline;
  681. begin
  682. Inc(ResultIdx);
  683. ResultBuffer[ResultIdx] := AChar;
  684. end;
  685. function PopStack() : Cardinal;inline;
  686. begin
  687. Result := locStack[locStackIdx];
  688. Dec(locStackIdx);
  689. end;
  690. var
  691. cu : Cardinal;
  692. decIdx : SmallInt;
  693. locIsWord : Boolean;
  694. i : Integer;
  695. p : PUnicodeChar;
  696. begin
  697. ResultIdx := -1;
  698. locStackIdx := -1;
  699. AddCompositionToStack(ADecomposeIndex);
  700. while (locStackIdx >= 0) do begin
  701. cu := PopStack();
  702. locIsWord := (cu <= MAX_WORD);
  703. if locIsWord then
  704. decIdx := GetProps(Word(cu))^.DecompositionID
  705. else
  706. decIdx := GetProps(cu)^.DecompositionID;
  707. if (decIdx = -1) then
  708. AddResult(cu)
  709. else
  710. AddCompositionToStack(decIdx);
  711. end;
  712. p := ABuffer;
  713. Result := 0;
  714. for i := 0 to ResultIdx do begin
  715. cu := ResultBuffer[i];
  716. if (cu <= MAX_WORD) then begin
  717. p[0] := UnicodeChar(Word(cu));
  718. Inc(p);
  719. end else begin
  720. FromUCS4(cu,p[0],p[1]);
  721. Inc(p,2);
  722. Inc(Result);
  723. end;
  724. end;
  725. Result := Result + ResultIdx + 1;
  726. end;
  727. procedure CanonicalOrder(var AString : UnicodeString);
  728. begin
  729. CanonicalOrder(@AString[1],Length(AString));
  730. end;
  731. procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
  732. var
  733. i, c : SizeInt;
  734. p, q : PUnicodeChar;
  735. locIsSurrogateP, locIsSurrogateQ : Boolean;
  736. procedure Swap();
  737. var
  738. t, t1 : UnicodeChar;
  739. begin
  740. if not locIsSurrogateP then begin
  741. if not locIsSurrogateQ then begin
  742. t := p^;
  743. p^ := q^;
  744. q^ := t;
  745. exit;
  746. end;
  747. t := p^;
  748. p[0] := q[0];
  749. p[1] := q[1];
  750. q[1] := t;
  751. exit;
  752. end;
  753. if not locIsSurrogateQ then begin
  754. t := q[0];
  755. p[2] := p[1];
  756. p[1] := p[0];
  757. p[0] := t;
  758. exit;
  759. end;
  760. t := p[0];
  761. t1 := p[1];
  762. p[0] := q[0];
  763. p[1] := q[1];
  764. q[0] := t;
  765. q[1] := t1;
  766. end;
  767. var
  768. pu : PUC_Prop;
  769. cccp, cccq : Byte;
  770. begin
  771. c := ALength;
  772. if (c < 2) then
  773. exit;
  774. p := AStr;
  775. i := 1;
  776. while (i < c) do begin
  777. pu := GetProps(Word(p^));
  778. locIsSurrogateP := (pu^.Category = UGC_Surrogate);
  779. if locIsSurrogateP then begin
  780. if (i = (c - 1)) then
  781. Break;
  782. if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
  783. Inc(p);
  784. Inc(i);
  785. Continue;
  786. end;
  787. pu := GetProps(p[0],p[1]);
  788. end;
  789. if (pu^.CCC > 0) then begin
  790. cccp := pu^.CCC;
  791. if locIsSurrogateP then
  792. q := p + 2
  793. else
  794. q := p + 1;
  795. pu := GetProps(Word(q^));
  796. locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
  797. if locIsSurrogateQ then begin
  798. if (i = c) then
  799. Break;
  800. if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
  801. Inc(p);
  802. Inc(i);
  803. Continue;
  804. end;
  805. pu := GetProps(q[0],q[1]);
  806. end;
  807. cccq := pu^.CCC;
  808. if (cccq > 0) and (cccp > cccq) then begin
  809. Swap();
  810. if (i > 1) then begin
  811. Dec(p);
  812. Dec(i);
  813. pu := GetProps(Word(p^));
  814. if (pu^.Category = UGC_Surrogate) then begin
  815. if (i > 1) then begin
  816. Dec(p);
  817. Dec(i);
  818. end;
  819. end;
  820. Continue;
  821. end;
  822. end;
  823. end;
  824. if locIsSurrogateP then begin
  825. Inc(p);
  826. Inc(i);
  827. end;
  828. Inc(p);
  829. Inc(i);
  830. end;
  831. end;
  832. //Canonical Decomposition
  833. function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
  834. begin
  835. Result := NormalizeNFD(@AString[1],Length(AString));
  836. end;
  837. function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
  838. const MAX_EXPAND = 3;
  839. var
  840. i, c, kc, k : SizeInt;
  841. pp, pr : PUnicodeChar;
  842. pu : PUC_Prop;
  843. locIsSurrogate : Boolean;
  844. cpArray : array[0..7] of Cardinal;
  845. cp : Cardinal;
  846. begin
  847. c := ALength;
  848. SetLength(Result,(MAX_EXPAND*c));
  849. if (c > 0) then begin
  850. pp := AStr;
  851. pr := @Result[1];
  852. i := 1;
  853. while (i <= c) do begin
  854. pu := GetProps(Word(pp^));
  855. locIsSurrogate := (pu^.Category = UGC_Surrogate);
  856. if locIsSurrogate then begin
  857. if (i = c) then
  858. Break;
  859. if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
  860. pr^ := pp^;
  861. Inc(pp);
  862. Inc(pr);
  863. Inc(i);
  864. Continue;
  865. end;
  866. pu := GetProps(pp[0],pp[1]);
  867. end;
  868. if pu^.HangulSyllable then begin
  869. if locIsSurrogate then begin
  870. cp := ToUCS4(pp[0],pp[1]);
  871. Inc(pp);
  872. Inc(i);
  873. end else begin
  874. cp := Word(pp^);
  875. end;
  876. kc := DecomposeHangul(cp,@cpArray[0]);
  877. for k := 0 to kc - 1 do begin
  878. if (cpArray[k] <= MAX_WORD) then begin
  879. pr^ := UnicodeChar(Word(cpArray[k]));
  880. pr := pr + 1;
  881. end else begin
  882. FromUCS4(cpArray[k],pr[0],pr[1]);
  883. pr := pr + 2;
  884. end;
  885. end;
  886. if (kc > 0) then
  887. Dec(pr);
  888. end else begin
  889. if (pu^.DecompositionID = -1) then begin
  890. pr^ := pp^;
  891. if locIsSurrogate then begin
  892. Inc(pp);
  893. Inc(pr);
  894. Inc(i);
  895. pr^ := pp^;
  896. end;
  897. end else begin
  898. k := Decompose(pu^.DecompositionID,pr);
  899. pr := pr + (k - 1);
  900. if locIsSurrogate then begin
  901. Inc(pp);
  902. Inc(i);
  903. end;
  904. end;
  905. end;
  906. Inc(pp);
  907. Inc(pr);
  908. Inc(i);
  909. end;
  910. Dec(pp);
  911. i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
  912. SetLength(Result,i);
  913. CanonicalOrder(@Result[1],Length(Result));
  914. end;
  915. end;
  916. type
  917. TBitOrder = 0..7;
  918. function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
  919. begin
  920. Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
  921. end;
  922. procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
  923. begin
  924. if AValue then
  925. AData := AData or (1 shl (ABit mod 8))
  926. else
  927. AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
  928. end;
  929. { TUCA_PropItemContextTreeNodeRec }
  930. function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
  931. begin
  932. if (Self.Left = 0) then
  933. Result := nil
  934. else
  935. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
  936. end;
  937. function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
  938. begin
  939. if (Self.Right = 0) then
  940. Result := nil
  941. else
  942. Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
  943. end;
  944. { TUCA_PropItemContextRec }
  945. function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
  946. begin
  947. Result := PUInt24(
  948. PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
  949. SizeOf(Self.WeightCount)
  950. );
  951. end;
  952. function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
  953. begin
  954. Result := PUCA_PropWeights(
  955. PtrUInt(@Self) +
  956. SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
  957. (Self.CodePointCount*SizeOf(UInt24))
  958. );
  959. end;
  960. { TUCA_PropItemContextTreeRec }
  961. function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
  962. begin
  963. if (Size = 0) then
  964. Result := nil
  965. else
  966. Result := PUCA_PropItemContextTreeNodeRec(
  967. PtrUInt(
  968. PtrUInt(@Self) + SizeOf(UInt24){Size}
  969. )
  970. );
  971. end;
  972. function CompareCodePoints(
  973. A : PUInt24; LA : Integer;
  974. B : PUInt24; LB : Integer
  975. ) : Integer;
  976. var
  977. i, hb : Integer;
  978. begin
  979. if (A = B) then
  980. exit(0);
  981. Result := 1;
  982. hb := LB - 1;
  983. for i := 0 to LA - 1 do begin
  984. if (i > hb) then
  985. exit;
  986. if (A[i] < B[i]) then
  987. exit(-1);
  988. if (A[i] > B[i]) then
  989. exit(1);
  990. end;
  991. if (LA = LB) then
  992. exit(0);
  993. exit(-1);
  994. end;
  995. function TUCA_PropItemContextTreeRec.Find(
  996. const AChars : PUInt24;
  997. const ACharCount : Integer;
  998. out ANode : PUCA_PropItemContextTreeNodeRec
  999. ) : Boolean;
  1000. var
  1001. t : PUCA_PropItemContextTreeNodeRec;
  1002. begin
  1003. t := Data;
  1004. while (t <> nil) do begin
  1005. case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
  1006. 0 : Break;
  1007. -1 : t := t^.GetLeftNode();
  1008. else
  1009. t := t^.GetRightNode();
  1010. end;
  1011. end;
  1012. Result := (t <> nil);
  1013. if Result then
  1014. ANode := t;
  1015. end;
  1016. { TUC_Prop }
  1017. function TUC_Prop.GetCategory: Byte;
  1018. begin
  1019. Result := Byte((CategoryData and Byte($F8)) shr 3);
  1020. end;
  1021. function TUC_Prop.GetNumericValue: Double;
  1022. begin
  1023. Result := UC_NUMERIC_ARRAY[NumericIndex];
  1024. end;
  1025. procedure TUC_Prop.SetCategory(AValue: Byte);
  1026. begin
  1027. CategoryData := Byte(CategoryData or Byte(AValue shl 3));
  1028. end;
  1029. function TUC_Prop.GetWhiteSpace: Boolean;
  1030. begin
  1031. Result := IsBitON(CategoryData,0);
  1032. end;
  1033. procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
  1034. begin
  1035. SetBit(CategoryData,0,AValue);
  1036. end;
  1037. function TUC_Prop.GetHangulSyllable: Boolean;
  1038. begin
  1039. Result := IsBitON(CategoryData,1);
  1040. end;
  1041. procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
  1042. begin
  1043. SetBit(CategoryData,1,AValue);
  1044. end;
  1045. { TUCA_DataBook }
  1046. function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
  1047. begin
  1048. Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
  1049. (AWeight^.Weights[0] <= Self.VariableHighLimit);
  1050. end;
  1051. { TUCA_PropItemRec }
  1052. function TUCA_PropItemRec.GetWeightLength: TWeightLength;
  1053. begin
  1054. Result := TWeightLength(Valid and Byte($F8) shr 3);
  1055. end;
  1056. function TUCA_PropItemRec.GetCodePoint() : UInt24;
  1057. begin
  1058. if HasCodePoint() then begin
  1059. if Contextual then
  1060. Result := PUInt24(
  1061. PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
  1062. Word(GetContext()^.Size)
  1063. )^
  1064. else
  1065. Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
  1066. end else begin
  1067. {$ifdef uni_debug}
  1068. raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
  1069. {$else uni_debug}
  1070. Result := ZERO_UINT24;
  1071. {$endif uni_debug}
  1072. end
  1073. end;
  1074. function TUCA_PropItemRec.HasCodePoint() : Boolean;
  1075. begin
  1076. Result := IsBitON(Flags,FLAG_CODEPOINT);
  1077. end;
  1078. {procedure TUCA_PropItemRec.SetWeightLength(AValue: TWeightLength);
  1079. begin
  1080. Valid := Valid or Byte(Byte(AValue) shl 3);
  1081. end;}
  1082. function TUCA_PropItemRec.IsValid() : Boolean;
  1083. begin
  1084. Result := IsBitON(Valid,BIT_POS_VALIDE);
  1085. end;
  1086. {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
  1087. begin
  1088. Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1089. end;}
  1090. procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
  1091. var
  1092. i, c : Integer;
  1093. p : PByte;
  1094. pd : PUCA_PropWeights;
  1095. begin
  1096. c := WeightLength;
  1097. p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
  1098. pd := ADest;
  1099. pd^.Weights[0] := PWord(p)^;
  1100. p := p + 2;
  1101. if IsBitON(Self.Valid,(BIT_POS_VALIDE+1)) then begin
  1102. pd^.Weights[1] := PWord(p)^;
  1103. p := p + 2;
  1104. end else begin
  1105. pd^.Weights[1] := p^;
  1106. p := p + 1;
  1107. end;
  1108. if IsBitON(Self.Valid,(BIT_POS_VALIDE+2)) then begin
  1109. pd^.Weights[2] := PWord(p)^;
  1110. p := p + 2;
  1111. end else begin
  1112. pd^.Weights[2] := p^;
  1113. p := p + 1;
  1114. end;
  1115. if (c > 1) then
  1116. Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
  1117. end;
  1118. function TUCA_PropItemRec.GetSelfOnlySize() : Word;
  1119. begin
  1120. Result := SizeOf(TUCA_PropItemRec);
  1121. if (WeightLength > 0) then begin
  1122. Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
  1123. if not IsBitON(Self.Valid,(BIT_POS_VALIDE+1)) then
  1124. Result := Result - 1;
  1125. if not IsBitON(Self.Valid,(BIT_POS_VALIDE+2)) then
  1126. Result := Result - 1;
  1127. end;
  1128. if HasCodePoint() then
  1129. Result := Result + SizeOf(UInt24);
  1130. if Contextual then
  1131. Result := Result + Word(GetContext()^.Size);
  1132. end;
  1133. function TUCA_PropItemRec.GetContextual: Boolean;
  1134. begin
  1135. Result := IsBitON(Flags,FLAG_CONTEXTUAL);
  1136. end;
  1137. function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
  1138. var
  1139. p : PtrUInt;
  1140. begin
  1141. if not Contextual then
  1142. exit(nil);
  1143. p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
  1144. if IsBitON(Flags,FLAG_CODEPOINT) then
  1145. p := p + SizeOf(UInt24);
  1146. Result := PUCA_PropItemContextTreeRec(p);
  1147. end;
  1148. function TUCA_PropItemRec.IsDeleted() : Boolean;
  1149. begin
  1150. Result := IsBitON(Flags,FLAG_DELETION);
  1151. end;
  1152. function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
  1153. var
  1154. i : Cardinal;
  1155. begin
  1156. if (ABook^.BMP_Table2 = nil) then
  1157. exit(nil);
  1158. i := ABook^.BMP_Table2[
  1159. (ABook^.BMP_Table1[WordRec(AChar).Hi] * 256) +
  1160. WordRec(AChar).Lo
  1161. ];
  1162. if (i > 0) then
  1163. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1164. else
  1165. Result := nil;
  1166. end;
  1167. function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
  1168. var
  1169. i : Cardinal;
  1170. begin
  1171. if (ABook^.OBMP_Table2 = nil) then
  1172. exit(nil);
  1173. i := ABook^.OBMP_Table2[
  1174. (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
  1175. Word(ALowS) - LOW_SURROGATE_BEGIN
  1176. ];
  1177. if (i > 0) then
  1178. Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
  1179. else
  1180. Result := nil;
  1181. end;
  1182. {$include weight_derivation.inc}
  1183. function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
  1184. var
  1185. bb : TUCASortKey;
  1186. begin
  1187. SetLength(bb,Length(B));
  1188. if (Length(bb) > 0) then
  1189. Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
  1190. Result := CompareSortKey(A,bb);
  1191. end;
  1192. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  1193. var
  1194. i, hb : Integer;
  1195. begin
  1196. if (Pointer(A) = Pointer(B)) then
  1197. exit(0);
  1198. Result := 1;
  1199. hb := Length(B) - 1;
  1200. for i := 0 to Length(A) - 1 do begin
  1201. if (i > hb) then
  1202. exit;
  1203. if (A[i] < B[i]) then
  1204. exit(-1);
  1205. if (A[i] > B[i]) then
  1206. exit(1);
  1207. end;
  1208. if (Length(A) = Length(B)) then
  1209. exit(0);
  1210. exit(-1);
  1211. end;
  1212. type
  1213. TUCA_PropWeightsArray = array of TUCA_PropWeights;
  1214. function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1215. var
  1216. r : TUCASortKey;
  1217. i, c, k, ral, levelCount : Integer;
  1218. pce : PUCA_PropWeights;
  1219. begin
  1220. c := Length(ACEList);
  1221. if (c = 0) then
  1222. exit(nil);
  1223. SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
  1224. ral := 0;
  1225. levelCount := Length(ACEList[0].Weights);
  1226. for i := 0 to levelCount - 1 do begin
  1227. if not ACollation^.Backwards[i] then begin
  1228. pce := @ACEList[0];
  1229. for k := 0 to c - 1 do begin
  1230. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1231. r[ral] := pce^.Weights[i];
  1232. ral := ral + 1;
  1233. end;
  1234. pce := pce + 1;
  1235. end;
  1236. end else begin
  1237. pce := @ACEList[c-1];
  1238. for k := 0 to c - 1 do begin
  1239. if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
  1240. r[ral] := pce^.Weights[i];
  1241. ral := ral + 1;
  1242. end;
  1243. pce := pce - 1;
  1244. end;
  1245. end;
  1246. r[ral] := 0;
  1247. ral := ral + 1;
  1248. end;
  1249. ral := ral - 1;
  1250. SetLength(r,ral);
  1251. Result := r;
  1252. end;
  1253. function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1254. var
  1255. r : TUCASortKey;
  1256. i, c, k, ral, levelCount : Integer;
  1257. pce : PUCA_PropWeights;
  1258. begin
  1259. c := Length(ACEList);
  1260. if (c = 0) then
  1261. exit(nil);
  1262. SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
  1263. ral := 0;
  1264. levelCount := Length(ACEList[0].Weights);
  1265. for i := 0 to levelCount - 1 do begin
  1266. if not ACollation^.Backwards[i] then begin
  1267. pce := @ACEList[0];
  1268. for k := 0 to c - 1 do begin
  1269. if (pce^.Weights[i] <> 0) then begin
  1270. r[ral] := pce^.Weights[i];
  1271. ral := ral + 1;
  1272. end;
  1273. pce := pce + 1;
  1274. end;
  1275. end else begin
  1276. pce := @ACEList[c-1];
  1277. for k := 0 to c - 1 do begin
  1278. if (pce^.Weights[i] <> 0) then begin
  1279. r[ral] := pce^.Weights[i];
  1280. ral := ral + 1;
  1281. end;
  1282. pce := pce - 1;
  1283. end;
  1284. end;
  1285. r[ral] := 0;
  1286. ral := ral + 1;
  1287. end;
  1288. ral := ral - 1;
  1289. SetLength(r,ral);
  1290. Result := r;
  1291. end;
  1292. function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
  1293. var
  1294. r : TUCASortKey;
  1295. i, c, k, ral, levelCount : Integer;
  1296. pce : PUCA_PropWeights;
  1297. variableState : Boolean;
  1298. begin
  1299. c := Length(ACEList);
  1300. if (c = 0) then
  1301. exit(nil);
  1302. SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
  1303. ral := 0;
  1304. levelCount := Length(ACEList[0].Weights);
  1305. for i := 0 to levelCount - 1 do begin
  1306. if not ACollation^.Backwards[i] then begin
  1307. variableState := False;
  1308. pce := @ACEList[0];
  1309. for k := 0 to c - 1 do begin
  1310. if not ACollation^.IsVariable(pce) then begin
  1311. if (pce^.Weights[0] <> 0) then
  1312. variableState := False;
  1313. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1314. r[ral] := pce^.Weights[i];
  1315. ral := ral + 1;
  1316. end;
  1317. end else begin
  1318. variableState := True;
  1319. end;
  1320. pce := pce + 1;
  1321. end;
  1322. end else begin
  1323. pce := @ACEList[c-1];
  1324. for k := 0 to c - 1 do begin
  1325. if not ACollation^.IsVariable(pce) then begin
  1326. if (pce^.Weights[0] <> 0) then
  1327. variableState := False;
  1328. if (pce^.Weights[i] <> 0) and not(variableState) then begin
  1329. r[ral] := pce^.Weights[i];
  1330. ral := ral + 1;
  1331. end;
  1332. end else begin
  1333. variableState := True;
  1334. end;
  1335. pce := pce - 1;
  1336. end;
  1337. end;
  1338. r[ral] := 0;
  1339. ral := ral + 1;
  1340. end;
  1341. ral := ral - 1;
  1342. //SetLength(r,ral);
  1343. //Result := r;
  1344. SetLength(Result,ral);
  1345. Move(r[0],Result[0],(ral*SizeOf(r[0])));
  1346. end;
  1347. function FormKeyShiftedTrimmed(
  1348. const ACEList : TUCA_PropWeightsArray;
  1349. const ACollation : PUCA_DataBook
  1350. ) : TUCASortKey;
  1351. var
  1352. i : Integer;
  1353. p : ^TUCASortKeyItem;
  1354. begin
  1355. Result := FormKeyShifted(ACEList,ACollation);
  1356. i := Length(Result) - 1;
  1357. if (i >= 0) then begin
  1358. p := @Result[i];
  1359. while (i >= 0) do begin
  1360. if (p^ <> $FFFF) then
  1361. Break;
  1362. Dec(i);
  1363. Dec(p);
  1364. end;
  1365. if ((i+1) < Length(Result)) then
  1366. SetLength(Result,(i+1));
  1367. end;
  1368. end;
  1369. function FindChild(
  1370. const ACodePoint : Cardinal;
  1371. const AParent : PUCA_PropItemRec
  1372. ) : PUCA_PropItemRec;inline;
  1373. var
  1374. k : Integer;
  1375. begin
  1376. Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
  1377. for k := 0 to AParent^.ChildCount - 1 do begin
  1378. if (ACodePoint = Result^.CodePoint) then
  1379. exit;
  1380. Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
  1381. end;
  1382. Result := nil;
  1383. end;
  1384. function ComputeSortKey(
  1385. const AString : UnicodeString;
  1386. const ACollation : PUCA_DataBook
  1387. ) : TUCASortKey;
  1388. begin
  1389. Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
  1390. end;
  1391. function ComputeSortKeyOLD(
  1392. const AStr : PUnicodeChar;
  1393. const ALength : SizeInt;
  1394. const ACollation : PUCA_DataBook
  1395. ) : TUCASortKey;
  1396. var
  1397. r : TUCA_PropWeightsArray;
  1398. ral {used length of "r"}: Integer;
  1399. rl {capacity of "r"} : Integer;
  1400. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1401. begin
  1402. if (rl < AMinGrow) then
  1403. rl := rl + AMinGrow
  1404. else
  1405. rl := 2 * rl;
  1406. SetLength(r,rl);
  1407. end;
  1408. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1409. begin
  1410. if ((ral + AItem^.WeightLength) > rl) then
  1411. GrowKey(AItem^.WeightLength);
  1412. AItem^.GetWeightArray(@r[ral]);
  1413. ral := ral + AItem^.WeightLength;
  1414. end;
  1415. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1416. begin
  1417. if ((ral + 2) > rl) then
  1418. GrowKey();
  1419. DeriveWeight(ACodePoint,@r[ral]);
  1420. ral := ral + 2;
  1421. end;
  1422. var
  1423. i : Integer;
  1424. s : UnicodeString;
  1425. ps : PUnicodeChar;
  1426. cp : Cardinal;
  1427. pp : PUCA_PropItemRec;
  1428. ppLevel : Byte;
  1429. removedCharIndex : array of DWord;
  1430. removedCharIndexLength : DWord;
  1431. locHistory : array[0..24] of record
  1432. i : Integer;
  1433. pp : PUCA_PropItemRec;
  1434. ppLevel : Byte;
  1435. cp : Cardinal;
  1436. removedCharIndexLength : DWord;
  1437. end;
  1438. locHistoryTop : Integer;
  1439. procedure RecordStep();inline;
  1440. begin
  1441. Inc(locHistoryTop);
  1442. locHistory[locHistoryTop].i := i;
  1443. locHistory[locHistoryTop].pp := pp;
  1444. locHistory[locHistoryTop].ppLevel := ppLevel;
  1445. locHistory[locHistoryTop].cp := cp;
  1446. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1447. end;
  1448. procedure ClearHistory();inline;
  1449. begin
  1450. locHistoryTop := -1;
  1451. end;
  1452. function HasHistory() : Boolean;inline;
  1453. begin
  1454. Result := (locHistoryTop >= 0);
  1455. end;
  1456. procedure GoBack();inline;
  1457. begin
  1458. i := locHistory[locHistoryTop].i;
  1459. cp := locHistory[locHistoryTop].cp;
  1460. pp := locHistory[locHistoryTop].pp;
  1461. ppLevel := locHistory[locHistoryTop].ppLevel;
  1462. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1463. ps := @s[i];
  1464. Dec(locHistoryTop);
  1465. end;
  1466. var
  1467. c : Integer;
  1468. lastUnblockedNonstarterCCC : Byte;
  1469. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1470. var
  1471. k : DWord;
  1472. pk : PUnicodeChar;
  1473. puk : PUC_Prop;
  1474. begin
  1475. k := AStartFrom;
  1476. if (k > c) then
  1477. exit(False);
  1478. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0) then
  1479. exit(False);
  1480. {if (k = (i+1)) or
  1481. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1482. then
  1483. lastUnblockedNonstarterCCC := 0;}
  1484. pk := @s[k];
  1485. if UnicodeIsHighSurrogate(pk^) then begin
  1486. if (k = c) then
  1487. exit(False);
  1488. if UnicodeIsLowSurrogate(pk[1]) then
  1489. puk := GetProps(pk[0],pk[1])
  1490. else
  1491. puk := GetProps(Word(pk^));
  1492. end else begin
  1493. puk := GetProps(Word(pk^));
  1494. end;
  1495. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1496. exit(False);
  1497. lastUnblockedNonstarterCCC := puk^.CCC;
  1498. Result := True;
  1499. end;
  1500. procedure RemoveChar(APos : Integer);inline;
  1501. begin
  1502. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1503. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1504. removedCharIndex[removedCharIndexLength] := APos;
  1505. Inc(removedCharIndexLength);
  1506. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  1507. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1508. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1509. removedCharIndex[removedCharIndexLength] := APos+1;
  1510. Inc(removedCharIndexLength);
  1511. end;
  1512. end;
  1513. procedure Inc_I();
  1514. begin
  1515. if (removedCharIndexLength = 0) then begin
  1516. Inc(i);
  1517. Inc(ps);
  1518. exit;
  1519. end;
  1520. while True do begin
  1521. Inc(i);
  1522. Inc(ps);
  1523. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  1524. Break;
  1525. end;
  1526. end;
  1527. var
  1528. k : Integer;
  1529. pp1 : PUCA_PropItemRec;
  1530. locIsSurrogate, ok : Boolean;
  1531. pu : PUC_Prop;
  1532. begin
  1533. if (ALength = 0) then
  1534. exit(nil);
  1535. c := ALength;
  1536. s := NormalizeNFD(AStr,c);
  1537. c := Length(s);
  1538. rl := 3*c;
  1539. SetLength(r,rl);
  1540. ral := 0;
  1541. ps := @s[1];
  1542. pp := nil;
  1543. ppLevel := 0;
  1544. locHistoryTop := -1;
  1545. removedCharIndexLength := 0;
  1546. i := 1;
  1547. while (i <= c) do begin
  1548. if UnicodeIsHighSurrogate(ps[0]) then begin
  1549. if (i = c) then
  1550. Break;
  1551. if UnicodeIsLowSurrogate(ps[1]) then begin
  1552. locIsSurrogate := True;
  1553. cp := ToUCS4(ps[0],ps[1]);
  1554. end else begin
  1555. locIsSurrogate := False;
  1556. cp := Word(ps[0]);
  1557. end;
  1558. end else begin
  1559. locIsSurrogate := False;
  1560. cp := Word(ps[0]);
  1561. end;
  1562. if (pp = nil) then begin // Start Matching
  1563. ppLevel := 0;
  1564. if locIsSurrogate then
  1565. pp := GetPropUCA(ps[0],ps[1],ACollation)
  1566. else
  1567. pp := GetPropUCA(ps[0],ACollation);
  1568. if (pp = nil) then begin
  1569. AddComputedWeights(cp);
  1570. ClearHistory();
  1571. end else begin
  1572. if (pp^.ChildCount = 0) or
  1573. (pp^.IsValid() and (i = c))
  1574. then begin
  1575. AddWeights(pp);
  1576. ClearHistory();
  1577. pp := nil;
  1578. end else begin
  1579. RecordStep();
  1580. end;
  1581. end;
  1582. end else begin
  1583. ok := False;
  1584. pp1 := PUCA_PropItemRec(PtrUInt(pp) + pp^.GetSelfOnlySize());
  1585. for k := 0 to pp^.ChildCount - 1 do begin
  1586. if (cp = pp1^.CodePoint) then begin
  1587. ok := True;
  1588. Break;
  1589. end;
  1590. pp1 := PUCA_PropItemRec(PtrUInt(pp1) + pp1^.Size);
  1591. end;
  1592. if not ok then begin
  1593. // permutations !
  1594. pu := GetProps(cp);
  1595. if (pu^.CCC > 0) then begin
  1596. lastUnblockedNonstarterCCC := pu^.CCC;
  1597. if locIsSurrogate then
  1598. k := i + 2
  1599. else
  1600. k := i + 1;
  1601. while IsUnblockedNonstarter(k) do begin
  1602. ok := UnicodeIsHighSurrogate(s[k]) and (k<c) and UnicodeIsLowSurrogate(s[k+1]);
  1603. if ok then
  1604. pp1 := FindChild(ToUCS4(s[k],s[k+1]),pp)
  1605. else
  1606. pp1 := FindChild(Word(s[k]),pp);
  1607. if (pp1 <> nil) then begin
  1608. pp := pp1;
  1609. RemoveChar(k);
  1610. Inc(ppLevel);
  1611. RecordStep();
  1612. if (pp^.ChildCount = 0 ) then
  1613. Break;
  1614. end;
  1615. if ok then
  1616. Inc(k);
  1617. Inc(k);
  1618. end;
  1619. end;
  1620. if pp^.IsValid() then begin
  1621. AddWeights(pp);
  1622. //GoBack();
  1623. ClearHistory();
  1624. pp := nil;
  1625. ppLevel := 0;
  1626. Continue;
  1627. end else begin
  1628. //walk back
  1629. ok := False;
  1630. while HasHistory() do begin
  1631. GoBack();
  1632. if pp^.IsValid() then begin
  1633. AddWeights(pp);
  1634. ClearHistory();
  1635. pp := nil;
  1636. ppLevel := 0;
  1637. ok := True;
  1638. Break;
  1639. end;
  1640. end;
  1641. if ok then begin
  1642. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  1643. Inc(i);
  1644. Inc(ps);
  1645. end;
  1646. Inc_I();
  1647. Continue;
  1648. end;
  1649. if (pp <> nil) then
  1650. AddComputedWeights(cp);
  1651. end;
  1652. end else begin
  1653. pp := pp1;
  1654. if (pp^.ChildCount = 0) then begin
  1655. AddWeights(pp);
  1656. ClearHistory();
  1657. pp := nil;
  1658. ppLevel := 0;
  1659. end else begin
  1660. Inc(ppLevel);
  1661. RecordStep();
  1662. end;
  1663. end;
  1664. end;
  1665. if locIsSurrogate then begin
  1666. Inc(ps);
  1667. Inc(i);
  1668. end;
  1669. //
  1670. Inc_I();
  1671. end;
  1672. SetLength(r,ral);
  1673. case ACollation^.VariableWeight of
  1674. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
  1675. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
  1676. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
  1677. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
  1678. else
  1679. Result := FormKeyShifted(r,ACollation);
  1680. end;
  1681. end;
  1682. //--------------------------------------------------------------------------
  1683. function ComputeRawSortKey(
  1684. const AStr : PUnicodeChar;
  1685. const ALength : SizeInt;
  1686. const ACollation : PUCA_DataBook
  1687. ) : TUCA_PropWeightsArray;
  1688. var
  1689. r : TUCA_PropWeightsArray;
  1690. ral {used length of "r"}: Integer;
  1691. rl {capacity of "r"} : Integer;
  1692. procedure GrowKey(const AMinGrow : Integer = 0);inline;
  1693. begin
  1694. if (rl < AMinGrow) then
  1695. rl := rl + AMinGrow
  1696. else
  1697. rl := 2 * rl;
  1698. SetLength(r,rl);
  1699. end;
  1700. var
  1701. i : Integer;
  1702. s : UnicodeString;
  1703. ps : PUnicodeChar;
  1704. cp : Cardinal;
  1705. cl : PUCA_DataBook;
  1706. pp : PUCA_PropItemRec;
  1707. ppLevel : Byte;
  1708. removedCharIndex : array of DWord;
  1709. removedCharIndexLength : DWord;
  1710. locHistory : array[0..24] of record
  1711. i : Integer;
  1712. cl : PUCA_DataBook;
  1713. pp : PUCA_PropItemRec;
  1714. ppLevel : Byte;
  1715. cp : Cardinal;
  1716. removedCharIndexLength : DWord;
  1717. end;
  1718. locHistoryTop : Integer;
  1719. suppressState : record
  1720. cl : PUCA_DataBook;
  1721. CharCount : Integer;
  1722. end;
  1723. LastKeyOwner : record
  1724. Length : Integer;
  1725. Chars : array[0..24] of UInt24;
  1726. end;
  1727. procedure SaveKeyOwner();
  1728. var
  1729. k : Integer;
  1730. kppLevel : Byte;
  1731. begin
  1732. k := 0;
  1733. kppLevel := High(Byte);
  1734. while (k <= locHistoryTop) do begin
  1735. if (kppLevel <> locHistory[k].ppLevel) then begin
  1736. LastKeyOwner.Chars[k] := locHistory[k].cp;
  1737. kppLevel := locHistory[k].ppLevel;
  1738. end;
  1739. k := k + 1;
  1740. end;
  1741. if (k = 0) or (kppLevel <> ppLevel) then begin
  1742. LastKeyOwner.Chars[k] := cp;
  1743. k := k + 1;
  1744. end;
  1745. LastKeyOwner.Length := k;
  1746. end;
  1747. procedure AddWeights(AItem : PUCA_PropItemRec);inline;
  1748. begin
  1749. SaveKeyOwner();
  1750. if ((ral + AItem^.WeightLength) > rl) then
  1751. GrowKey(AItem^.WeightLength);
  1752. AItem^.GetWeightArray(@r[ral]);
  1753. ral := ral + AItem^.WeightLength;
  1754. end;
  1755. procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
  1756. begin
  1757. if ((ral + AItem^.WeightCount) > rl) then
  1758. GrowKey(AItem^.WeightCount);
  1759. Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
  1760. ral := ral + AItem^.WeightCount;
  1761. end;
  1762. procedure AddComputedWeights(ACodePoint : Cardinal);inline;
  1763. begin
  1764. SaveKeyOwner();
  1765. if ((ral + 2) > rl) then
  1766. GrowKey();
  1767. DeriveWeight(ACodePoint,@r[ral]);
  1768. ral := ral + 2;
  1769. end;
  1770. procedure RecordDeletion();inline;
  1771. begin
  1772. if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
  1773. if (suppressState.cl = nil) or
  1774. (suppressState.CharCount > ppLevel)
  1775. then begin
  1776. suppressState.cl := cl;
  1777. suppressState.CharCount := ppLevel;
  1778. end;
  1779. end;
  1780. end;
  1781. procedure RecordStep();inline;
  1782. begin
  1783. Inc(locHistoryTop);
  1784. locHistory[locHistoryTop].i := i;
  1785. locHistory[locHistoryTop].cl := cl;
  1786. locHistory[locHistoryTop].pp := pp;
  1787. locHistory[locHistoryTop].ppLevel := ppLevel;
  1788. locHistory[locHistoryTop].cp := cp;
  1789. locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
  1790. RecordDeletion();
  1791. end;
  1792. procedure ClearHistory();inline;
  1793. begin
  1794. locHistoryTop := -1;
  1795. end;
  1796. function HasHistory() : Boolean;inline;
  1797. begin
  1798. Result := (locHistoryTop >= 0);
  1799. end;
  1800. function GetHistoryLength() : Integer;inline;
  1801. begin
  1802. Result := (locHistoryTop + 1);
  1803. end;
  1804. procedure GoBack();inline;
  1805. begin
  1806. Assert(locHistoryTop >= 0);
  1807. i := locHistory[locHistoryTop].i;
  1808. cp := locHistory[locHistoryTop].cp;
  1809. cl := locHistory[locHistoryTop].cl;
  1810. pp := locHistory[locHistoryTop].pp;
  1811. ppLevel := locHistory[locHistoryTop].ppLevel;
  1812. removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
  1813. ps := @s[i];
  1814. Dec(locHistoryTop);
  1815. end;
  1816. var
  1817. c : Integer;
  1818. lastUnblockedNonstarterCCC : Byte;
  1819. function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
  1820. var
  1821. k : DWord;
  1822. pk : PUnicodeChar;
  1823. puk : PUC_Prop;
  1824. begin
  1825. k := AStartFrom;
  1826. if (k > c) then
  1827. exit(False);
  1828. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0) then
  1829. exit(False);
  1830. {if (k = (i+1)) or
  1831. ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
  1832. then
  1833. lastUnblockedNonstarterCCC := 0;}
  1834. pk := @s[k];
  1835. if UnicodeIsHighSurrogate(pk^) then begin
  1836. if (k = c) then
  1837. exit(False);
  1838. if UnicodeIsLowSurrogate(pk[1]) then
  1839. puk := GetProps(pk[0],pk[1])
  1840. else
  1841. puk := GetProps(Word(pk^));
  1842. end else begin
  1843. puk := GetProps(Word(pk^));
  1844. end;
  1845. if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
  1846. exit(False);
  1847. lastUnblockedNonstarterCCC := puk^.CCC;
  1848. Result := True;
  1849. end;
  1850. procedure RemoveChar(APos : Integer);inline;
  1851. begin
  1852. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1853. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1854. removedCharIndex[removedCharIndexLength] := APos;
  1855. Inc(removedCharIndexLength);
  1856. if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
  1857. if (removedCharIndexLength >= Length(removedCharIndex)) then
  1858. SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
  1859. removedCharIndex[removedCharIndexLength] := APos+1;
  1860. Inc(removedCharIndexLength);
  1861. end;
  1862. end;
  1863. procedure Inc_I();inline;
  1864. begin
  1865. if (removedCharIndexLength = 0) then begin
  1866. Inc(i);
  1867. Inc(ps);
  1868. exit;
  1869. end;
  1870. while True do begin
  1871. Inc(i);
  1872. Inc(ps);
  1873. if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
  1874. Break;
  1875. end;
  1876. end;
  1877. var
  1878. surrogateState : Boolean;
  1879. function MoveToNextChar() : Boolean;inline;
  1880. begin
  1881. Result := True;
  1882. if UnicodeIsHighSurrogate(ps[0]) then begin
  1883. if (i = c) then
  1884. exit(False);
  1885. if UnicodeIsLowSurrogate(ps[1]) then begin
  1886. surrogateState := True;
  1887. cp := ToUCS4(ps[0],ps[1]);
  1888. end else begin
  1889. surrogateState := False;
  1890. cp := Word(ps[0]);
  1891. end;
  1892. end else begin
  1893. surrogateState := False;
  1894. cp := Word(ps[0]);
  1895. end;
  1896. end;
  1897. procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
  1898. begin
  1899. cl := nil;
  1900. pp := nil;
  1901. ppLevel := 0;
  1902. if AClearSuppressInfo then begin
  1903. suppressState.cl := nil;
  1904. suppressState.CharCount := 0;
  1905. end;
  1906. end;
  1907. function FindPropUCA() : Boolean;
  1908. var
  1909. candidateCL : PUCA_DataBook;
  1910. begin
  1911. pp := nil;
  1912. if (cl = nil) then
  1913. candidateCL := ACollation
  1914. else
  1915. candidateCL := cl;
  1916. if surrogateState then begin
  1917. while (candidateCL <> nil) do begin
  1918. pp := GetPropUCA(ps[0],ps[1],candidateCL);
  1919. if (pp <> nil) then
  1920. break;
  1921. candidateCL := candidateCL^.Base;
  1922. end;
  1923. end else begin
  1924. while (candidateCL <> nil) do begin
  1925. pp := GetPropUCA(ps[0],candidateCL);
  1926. if (pp <> nil) then
  1927. break;
  1928. candidateCL := candidateCL^.Base;
  1929. end;
  1930. end;
  1931. cl := candidateCL;
  1932. Result := (pp <> nil);
  1933. end;
  1934. procedure AddWeightsAndClear();inline;
  1935. var
  1936. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1937. begin
  1938. if (pp^.GetWeightLength() > 0) then begin
  1939. AddWeights(pp);
  1940. end else
  1941. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1942. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1943. (ctxNode^.Data.WeightCount > 0)
  1944. then begin
  1945. AddContextWeights(@ctxNode^.Data);
  1946. end;
  1947. //AddWeights(pp);
  1948. ClearHistory();
  1949. ClearPP();
  1950. end;
  1951. procedure StartMatch();
  1952. procedure HandleLastChar();
  1953. var
  1954. ctxNode : PUCA_PropItemContextTreeNodeRec;
  1955. begin
  1956. while True do begin
  1957. if pp^.IsValid() then begin
  1958. if (pp^.GetWeightLength() > 0) then
  1959. AddWeights(pp)
  1960. else
  1961. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1962. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  1963. (ctxNode^.Data.WeightCount > 0)
  1964. then
  1965. AddContextWeights(@ctxNode^.Data)
  1966. else
  1967. AddComputedWeights(cp){handle deletion of code point};
  1968. break;
  1969. end;
  1970. if (cl^.Base = nil) then begin
  1971. AddComputedWeights(cp);
  1972. break;
  1973. end;
  1974. cl := cl^.Base;
  1975. if not FindPropUCA() then begin
  1976. AddComputedWeights(cp);
  1977. break;
  1978. end;
  1979. end;
  1980. end;
  1981. var
  1982. tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
  1983. begin
  1984. ppLevel := 0;
  1985. if not FindPropUCA() then begin
  1986. AddComputedWeights(cp);
  1987. ClearHistory();
  1988. ClearPP();
  1989. end else begin
  1990. if (i = c) then begin
  1991. HandleLastChar();
  1992. end else begin
  1993. if pp^.IsValid()then begin
  1994. if (pp^.ChildCount = 0) then begin
  1995. if (pp^.GetWeightLength() > 0) then
  1996. AddWeights(pp)
  1997. else
  1998. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  1999. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
  2000. (tmpCtxNode^.Data.WeightCount > 0)
  2001. then
  2002. AddContextWeights(@tmpCtxNode^.Data)
  2003. else
  2004. AddComputedWeights(cp){handle deletion of code point};
  2005. ClearPP();
  2006. ClearHistory();
  2007. end else begin
  2008. RecordStep();
  2009. end
  2010. end else begin
  2011. if (pp^.ChildCount = 0) then begin
  2012. AddComputedWeights(cp);
  2013. ClearPP();
  2014. ClearHistory();
  2015. end else begin
  2016. RecordStep();
  2017. end;
  2018. end ;
  2019. end;
  2020. end;
  2021. end;
  2022. function TryPermutation() : Boolean;
  2023. var
  2024. kk : Integer;
  2025. b : Boolean;
  2026. puk : PUC_Prop;
  2027. ppk : PUCA_PropItemRec;
  2028. begin
  2029. Result := False;
  2030. puk := GetProps(cp);
  2031. if (puk^.CCC = 0) then
  2032. exit;
  2033. lastUnblockedNonstarterCCC := puk^.CCC;
  2034. if surrogateState then
  2035. kk := i + 2
  2036. else
  2037. kk := i + 1;
  2038. while IsUnblockedNonstarter(kk) do begin
  2039. b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
  2040. if b then
  2041. ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
  2042. else
  2043. ppk := FindChild(Word(s[kk]),pp);
  2044. if (ppk <> nil) then begin
  2045. pp := ppk;
  2046. RemoveChar(kk);
  2047. Inc(ppLevel);
  2048. RecordStep();
  2049. Result := True;
  2050. if (pp^.ChildCount = 0 ) then
  2051. Break;
  2052. end;
  2053. if b then
  2054. Inc(kk);
  2055. Inc(kk);
  2056. end;
  2057. end;
  2058. procedure AdvanceCharPos();inline;
  2059. begin
  2060. if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
  2061. Inc(i);
  2062. Inc(ps);
  2063. end;
  2064. Inc_I();
  2065. end;
  2066. var
  2067. ok : Boolean;
  2068. pp1 : PUCA_PropItemRec;
  2069. cltemp : PUCA_DataBook;
  2070. ctxNode : PUCA_PropItemContextTreeNodeRec;
  2071. begin
  2072. if (ALength = 0) then
  2073. exit(nil);
  2074. c := ALength;
  2075. s := NormalizeNFD(AStr,c);
  2076. c := Length(s);
  2077. rl := 3*c;
  2078. SetLength(r,rl);
  2079. ral := 0;
  2080. ps := @s[1];
  2081. ClearPP();
  2082. locHistoryTop := -1;
  2083. removedCharIndexLength := 0;
  2084. FillByte(suppressState,SizeOf(suppressState),0);
  2085. LastKeyOwner.Length := 0;
  2086. i := 1;
  2087. while (i <= c) and MoveToNextChar() do begin
  2088. if (pp = nil) then begin // Start Matching
  2089. StartMatch();
  2090. end else begin
  2091. pp1 := FindChild(cp,pp);
  2092. if (pp1 <> nil) then begin
  2093. Inc(ppLevel);
  2094. pp := pp1;
  2095. if (pp^.ChildCount = 0) or (i = c) then begin
  2096. ok := False;
  2097. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2098. if (pp^.GetWeightLength() > 0) then begin
  2099. AddWeightsAndClear();
  2100. ok := True;
  2101. end else
  2102. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2103. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2104. (ctxNode^.Data.WeightCount > 0)
  2105. then begin
  2106. AddContextWeights(@ctxNode^.Data);
  2107. ClearHistory();
  2108. ClearPP();
  2109. ok := True;
  2110. end
  2111. end;
  2112. if not ok then begin
  2113. RecordDeletion();
  2114. ok := False;
  2115. while HasHistory() do begin
  2116. GoBack();
  2117. if pp^.IsValid() and
  2118. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2119. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2120. )
  2121. then begin
  2122. AddWeightsAndClear();
  2123. ok := True;
  2124. Break;
  2125. end;
  2126. end;
  2127. if not ok then begin
  2128. cltemp := cl^.Base;
  2129. if (cltemp <> nil) then begin
  2130. ClearPP(False);
  2131. cl := cltemp;
  2132. Continue;
  2133. end;
  2134. end;
  2135. if not ok then begin
  2136. AddComputedWeights(cp);
  2137. ClearHistory();
  2138. ClearPP();
  2139. end;
  2140. end;
  2141. end else begin
  2142. RecordStep();
  2143. end;
  2144. end else begin
  2145. // permutations !
  2146. ok := False;
  2147. if TryPermutation() and pp^.IsValid() then begin
  2148. if (suppressState.CharCount = 0) then begin
  2149. AddWeightsAndClear();
  2150. Continue;
  2151. end;
  2152. while True do begin
  2153. if pp^.IsValid() and
  2154. (pp^.GetWeightLength() > 0) and
  2155. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2156. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2157. )
  2158. then begin
  2159. AddWeightsAndClear();
  2160. ok := True;
  2161. break;
  2162. end;
  2163. if not HasHistory() then
  2164. break;
  2165. GoBack();
  2166. if (pp = nil) then
  2167. break;
  2168. end;
  2169. end;
  2170. if not ok then begin
  2171. if pp^.IsValid() and (suppressState.CharCount = 0) then begin
  2172. if (pp^.GetWeightLength() > 0) then begin
  2173. AddWeightsAndClear();
  2174. ok := True;
  2175. end else
  2176. if (LastKeyOwner.Length > 0) and pp^.Contextual and
  2177. pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
  2178. (ctxNode^.Data.WeightCount > 0)
  2179. then begin
  2180. AddContextWeights(@ctxNode^.Data);
  2181. ClearHistory();
  2182. ClearPP();
  2183. ok := True;
  2184. end
  2185. end;
  2186. if ok then
  2187. Continue;
  2188. end;
  2189. if not ok then begin
  2190. if (cl^.Base <> nil) then begin
  2191. cltemp := cl^.Base;
  2192. while HasHistory() do
  2193. GoBack();
  2194. pp := nil;
  2195. ppLevel := 0;
  2196. cl := cltemp;
  2197. Continue;
  2198. end;
  2199. //walk back
  2200. ok := False;
  2201. while HasHistory() do begin
  2202. GoBack();
  2203. if pp^.IsValid() and
  2204. (pp^.GetWeightLength() > 0) and
  2205. ( (suppressState.CharCount = 0) or
  2206. ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
  2207. ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
  2208. )
  2209. )
  2210. then begin
  2211. AddWeightsAndClear();
  2212. ok := True;
  2213. Break;
  2214. end;
  2215. end;
  2216. if ok then begin
  2217. AdvanceCharPos();
  2218. Continue;
  2219. end;
  2220. if (pp <> nil) then begin
  2221. AddComputedWeights(cp);
  2222. ClearHistory();
  2223. ClearPP();
  2224. end;
  2225. end;
  2226. end;
  2227. end;
  2228. if surrogateState then begin
  2229. Inc(ps);
  2230. Inc(i);
  2231. end;
  2232. //
  2233. Inc_I();
  2234. end;
  2235. SetLength(r,ral);
  2236. Result := r;
  2237. end;
  2238. function ComputeSortKey(
  2239. const AStr : PUnicodeChar;
  2240. const ALength : SizeInt;
  2241. const ACollation : PUCA_DataBook
  2242. ) : TUCASortKey;
  2243. var
  2244. r : TUCA_PropWeightsArray;
  2245. begin
  2246. r := ComputeRawSortKey(AStr,ALength,ACollation);
  2247. case ACollation^.VariableWeight of
  2248. TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
  2249. TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
  2250. TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
  2251. TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
  2252. else
  2253. Result := FormKeyShifted(r,ACollation);
  2254. end;
  2255. end;
  2256. end.