cldrhelper.pas 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336
  1. { CLDR collation helper unit.
  2. Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit cldrhelper;
  17. {$mode objfpc}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$modeswitch advancedrecords}
  21. {$scopedenums on}
  22. {$typedaddress on}
  23. {$macro on}
  24. {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
  25. {$define X_PACKED:=}
  26. {$else FPC_REQUIRES_PROPER_ALIGNMENT}
  27. {$define X_PACKED:=packed}
  28. {$endif FPC_REQUIRES_PROPER_ALIGNMENT}
  29. interface
  30. uses
  31. SysUtils, Classes, helper;
  32. const
  33. COLLATION_FILE_PREFIX = 'collation_';
  34. type
  35. TUCA_LineRecArray = array of TUCA_LineRec;
  36. ECldrException = class(Exception)
  37. end;
  38. TReorderWeigthKind = (
  39. Primary, Secondary, Tertiary, Identity, Deletion
  40. );
  41. TReorderWeigthKinds = set of TReorderWeigthKind;
  42. TReorderLogicalReset = (
  43. None,// FirstVariable, LastVariable,
  44. FirstTertiaryIgnorable, LastTertiaryIgnorable,
  45. FirstSecondaryIgnorable, LastSecondaryIgnorable,
  46. FirstPrimaryIgnorable, LastPrimaryIgnorable,
  47. LastRegular,
  48. FirstNonIgnorable, LastNonIgnorable,
  49. FirstTrailing, LastTrailing
  50. );
  51. const
  52. FixableReorderLogicalSet = [
  53. TReorderLogicalReset.LastRegular,TReorderLogicalReset.FirstTrailing,
  54. TReorderLogicalReset.LastTrailing
  55. ];
  56. type
  57. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  58. TCollationFields = set of TCollationField;
  59. PReorderUnit = ^TReorderUnit;
  60. { TReorderUnit }
  61. TReorderUnit = X_PACKED record
  62. private
  63. FVirtualPosition : TReorderLogicalReset;
  64. public
  65. Context : TUnicodeCodePointArray;
  66. ExpansionChars : TUnicodeCodePointArray;
  67. Characters : TUnicodeCodePointArray;
  68. WeigthKind : TReorderWeigthKind;
  69. InitialPosition : Integer;
  70. Changed : Boolean;
  71. public
  72. property VirtualPosition : TReorderLogicalReset read FVirtualPosition;
  73. function IsVirtual() : Boolean;inline;
  74. public
  75. class function From(
  76. const AChars,
  77. AContext : array of TUnicodeCodePoint;
  78. const AWeigthKind : TReorderWeigthKind;
  79. const AInitialPosition : Integer
  80. ) : TReorderUnit;static;overload;
  81. class function From(
  82. const AChars : array of TUnicodeCodePoint;
  83. const AWeigthKind : TReorderWeigthKind;
  84. const AInitialPosition : Integer
  85. ) : TReorderUnit;static;overload;
  86. class function From(
  87. const AChar : TUnicodeCodePoint;
  88. const AWeigthKind : TReorderWeigthKind;
  89. const AInitialPosition : Integer
  90. ) : TReorderUnit;static;overload;
  91. class function From(
  92. const AChar : TUnicodeCodePoint;
  93. const AContext : array of TUnicodeCodePoint;
  94. const AWeigthKind : TReorderWeigthKind;
  95. const AInitialPosition : Integer
  96. ) : TReorderUnit;static;overload;
  97. class function From(
  98. const AReset : TReorderLogicalReset
  99. ) : TReorderUnit;static;overload;
  100. procedure SetExpansion(const AChars : array of TUnicodeCodePoint);
  101. procedure SetExpansion(const AChar : TUnicodeCodePoint);
  102. procedure Clear();
  103. procedure Assign(const AItem : PReorderUnit);
  104. function HasContext() : Boolean;
  105. function IsExpansion() : Boolean;
  106. end;
  107. PReorderSequence = ^TReorderSequence;
  108. { TReorderSequence }
  109. TReorderSequence = X_PACKED record
  110. public
  111. Reset : array of TUnicodeCodePoint;
  112. Elements : array of TReorderUnit;
  113. LogicalPosition : TReorderLogicalReset;
  114. Before : Boolean;
  115. public
  116. procedure Clear();
  117. procedure SetElementCount(const ALength : Integer);
  118. procedure Assign(ASource : PReorderSequence);
  119. end;
  120. TReorderSequenceArray = array of TReorderSequence;
  121. { TOrderedCharacters }
  122. TOrderedCharacters = record
  123. private
  124. FActualLength : Integer;
  125. private
  126. procedure EnsureSize(const AMinSize : Integer);
  127. public
  128. Data : array of TReorderUnit;
  129. property ActualLength : Integer read FActualLength;
  130. public
  131. class function Create(const ACapacity : Integer) : TOrderedCharacters;static;overload;
  132. class function Create() : TOrderedCharacters;static;overload;
  133. procedure Clear();
  134. function Clone() : TOrderedCharacters;
  135. function Insert(const AItem : TReorderUnit; const ADestPos : Integer) : Integer;
  136. function Append(const AItem : TReorderUnit) : Integer;
  137. procedure Delete(const AIndex : Integer);
  138. procedure ApplyStatement(const AStatement : PReorderSequence);
  139. end;
  140. POrderedCharacters = ^TOrderedCharacters;
  141. { TCldrImport }
  142. TCldrImport = class
  143. private
  144. FSource: string;
  145. FTypeName: string;
  146. public
  147. property Source : string read FSource;
  148. property TypeName : string read FTypeName;
  149. end;
  150. { TCldrImportList }
  151. TCldrImportList = class
  152. private
  153. FItems : array of TCldrImport;
  154. private
  155. function GetCount: Integer;
  156. function GetItem(AIndex : Integer): TCldrImport;
  157. public
  158. destructor Destroy();override;
  159. procedure Clear();
  160. function IndexOf(const ASource, AType : string) : Integer;
  161. function Find(const ASource, AType : string) : TCldrImport;
  162. function Add(const ASource, AType : string) : TCldrImport;
  163. property Count : Integer read GetCount;
  164. property Item[AIndex : Integer] : TCldrImport read GetItem;default;
  165. end;
  166. TCldrCollation = class;
  167. { TCldrCollationItem }
  168. TCldrCollationItem = class
  169. private
  170. FAlt: string;
  171. FBackwards: Boolean;
  172. FBase: string;
  173. FChangedFields: TCollationFields;
  174. FImports: TCldrImportList;
  175. FParent: TCldrCollation;
  176. FRules: TReorderSequenceArray;
  177. FTypeName: string;
  178. public
  179. constructor Create();
  180. destructor Destroy;override;
  181. procedure Clear();
  182. function IsPrivate() : Boolean;
  183. property Parent : TCldrCollation read FParent;
  184. property TypeName : string read FTypeName write FTypeName;
  185. property Alt : string read FAlt write FAlt;
  186. property Base : string read FBase write FBase;
  187. property Backwards : Boolean read FBackwards write FBackwards;
  188. property Rules : TReorderSequenceArray read FRules write FRules;
  189. property ChangedFields : TCollationFields read FChangedFields write FChangedFields;
  190. property Imports : TCldrImportList read FImports;
  191. end;
  192. TCldrParserMode = (HeaderParsing, FullParsing);
  193. TCldrCollationRepository = class;
  194. { TCldrCollation }
  195. TCldrCollation = class
  196. private
  197. FItems : array of TCldrCollationItem;
  198. FLocalID: string;
  199. FDefaultType: string;
  200. FVersion: string;
  201. FLanguage: string;
  202. FMode: TCldrParserMode;
  203. FRepository: TCldrCollationRepository;
  204. private
  205. function GetItem(Index : Integer): TCldrCollationItem;
  206. function GetItemCount: Integer;
  207. public
  208. destructor Destroy();override;
  209. procedure Clear();
  210. function IndexOf(const AItemName : string) : Integer;overload;
  211. function IndexOf(const AItemName, AItemAlt : string) : Integer;overload;
  212. function Find(const AItemName : string) : TCldrCollationItem;overload;
  213. function Find(const AItemName, AItemAlt : string) : TCldrCollationItem;overload;
  214. function Add(AItem : TCldrCollationItem) : Integer;
  215. function FindPublicItemCount() : Integer;
  216. property Language : string read FLanguage write FLanguage;
  217. property LocalID : string read FLocalID write FLocalID;
  218. property Version : string read FVersion write FVersion;
  219. property DefaultType : string read FDefaultType write FDefaultType;
  220. property ItemCount : Integer read GetItemCount;
  221. property Items[Index : Integer] : TCldrCollationItem read GetItem;
  222. property Mode : TCldrParserMode read FMode write FMode;
  223. property Repository : TCldrCollationRepository read FRepository;
  224. end;
  225. ICldrCollationLoader = interface
  226. ['{117AAC84-06CE-4EC8-9B07-4E81EC23930C}']
  227. procedure LoadCollation(
  228. const ALanguage : string;
  229. ACollation : TCldrCollation;
  230. AMode : TCldrParserMode
  231. );
  232. procedure LoadCollationType(
  233. const ALanguage,
  234. ATypeName : string;
  235. AType : TCldrCollationItem
  236. );
  237. end;
  238. { TCldrCollationRepository }
  239. TCldrCollationRepository = class
  240. private
  241. FItems : array of TCldrCollation;
  242. FLoader: ICldrCollationLoader;
  243. private
  244. function GetItem(const AIndex : Integer): TCldrCollation;
  245. function GetItemCount: Integer;
  246. function IndexOfItem(AItem : TCldrCollation) : Integer;
  247. procedure Add(AItem : TCldrCollation);
  248. public
  249. constructor Create(ALoader : ICldrCollationLoader);
  250. destructor Destroy;override;
  251. procedure FreeItems();
  252. procedure Clear();
  253. procedure SetLoader(AValue : ICldrCollationLoader);
  254. function IndexOf(const ALanguage : string) : Integer;
  255. function Find(const ALanguage : string) : TCldrCollation;
  256. function Load(const ALanguage : string; const AMode : TCldrParserMode) : TCldrCollation;
  257. property ItemCount : Integer read GetItemCount;
  258. property Items[const AIndex : Integer] : TCldrCollation read GetItem;
  259. property Loader : ICldrCollationLoader read FLoader;
  260. end;
  261. TRuleVisiterFunction =
  262. function(
  263. ARule : PReorderSequence;
  264. AOwner : TCldrCollationItem;
  265. AData : Pointer
  266. ) : Boolean;
  267. function ForEachRule(
  268. ACollationType : TCldrCollationItem;
  269. AVisitFunc : TRuleVisiterFunction;
  270. ACustomData : Pointer
  271. ) : Boolean;
  272. function ComputeWeigths(
  273. const AData : PReorderUnit;
  274. const ADataLen : Integer;
  275. const ADataWeigths : TUCA_LineRecArray;
  276. out AResult : TUCA_LineRecArray
  277. ) : Integer;
  278. function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
  279. procedure GenerateCdlrCollation(
  280. ACollation : TCldrCollation;
  281. AItemName : string;
  282. AStoreName : string;
  283. AStream,
  284. ANativeEndianStream,
  285. AOtherEndianStream,
  286. ABinaryNativeEndianStream,
  287. ABinaryOtherEndianStream : TStream;
  288. ARootChars : TOrderedCharacters;
  289. ARootWeigths : TUCA_LineRecArray
  290. );
  291. procedure GenerateUCA_CLDR_Head(
  292. ADest : TStream;
  293. ABook : PUCA_DataBook;
  294. AProps : PUCA_PropBook;
  295. ACollation : TCldrCollationItem
  296. );
  297. function FillInitialPositions(
  298. AData : PReorderUnit;
  299. const ADataLen : Integer;
  300. const ADataWeigths : TUCA_LineRecArray
  301. ) : Integer;
  302. function IndexOf(
  303. const APattern : array of TUnicodeCodePoint;
  304. const APatternContext : array of TUnicodeCodePoint;
  305. const ASequence : PReorderUnit;
  306. const ASequenceLength : Integer
  307. ) : Integer;
  308. function TryStrToLogicalReorder(
  309. const AValue : string;
  310. out AResult : TReorderLogicalReset
  311. ) : Boolean;
  312. resourcestring
  313. sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
  314. sCodePointExpected = 'Code Point node expected as child at this position "%d".';
  315. sCollationsExistsAlready = 'This collation already exists : "%s"';
  316. sCollationsNodeNotFound = '"collations" node not found.';
  317. sCollationTypeNotFound = 'collation "Type" not found : "%s".';
  318. sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
  319. sInvalidResetClause = 'Invalid "Reset" clause.';
  320. sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
  321. sRulesNodeNotFound = '"rules" node not found.';
  322. sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".';
  323. sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".';
  324. sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".';
  325. sVirtualIsReadOnly = 'Virtual logical "Reset" items are read only.';
  326. implementation
  327. uses
  328. RtlConsts, typinfo;
  329. function TryStrToLogicalReorder(
  330. const AValue : string;
  331. out AResult : TReorderLogicalReset
  332. ) : Boolean;
  333. var
  334. s : string;
  335. i : Integer;
  336. begin
  337. s := StringReplace(AValue,' ','',[rfReplaceAll]);
  338. s := StringReplace(s,'_','',[rfReplaceAll]);
  339. i := GetEnumValue(TypeInfo(TReorderLogicalReset),s);
  340. Result := (i > -1);
  341. if Result then
  342. AResult := TReorderLogicalReset(i);
  343. end;
  344. function ToStr(const ACharacters : array of TUnicodeCodePoint): string;
  345. var
  346. i : Integer;
  347. begin
  348. Result := '';
  349. for i := Low(ACharacters) to High(ACharacters) do begin
  350. if (ACharacters[i] > $FFFF) then
  351. Result := Result + ' ' + IntToHex(ACharacters[i],5)
  352. else
  353. Result := Result + ' ' + IntToHex(ACharacters[i],4);
  354. end;
  355. Result := Trim(Result);
  356. end;
  357. function IndexOf(
  358. const APattern : array of TUnicodeCodePoint;
  359. const APatternContext : array of TUnicodeCodePoint;
  360. const ASequence : PReorderUnit;
  361. const ASequenceLength : Integer
  362. ) : Integer;
  363. var
  364. i, lp, sizep, lengthContext, sizeContext : Integer;
  365. p : PReorderUnit;
  366. begin
  367. Result := -1;
  368. if (ASequenceLength = 0) then
  369. exit;
  370. lp := Length(APattern);
  371. if (lp = 0) then
  372. exit;
  373. sizep := lp*SizeOf(TUnicodeCodePoint);
  374. lengthContext := Length(APatternContext);
  375. sizeContext := lengthContext*SizeOf(TUnicodeCodePoint);
  376. p := ASequence;
  377. for i := 0 to ASequenceLength - 1 do begin
  378. if (Length(p^.Characters) = lp) then begin
  379. if CompareMem(@APattern[0],@p^.Characters[0],sizep) then begin
  380. if (Length(p^.Context) = lengthContext) and
  381. ( (lengthContext = 0) or
  382. CompareMem(@p^.Context[0],@APatternContext[0],sizeContext)
  383. )
  384. then begin
  385. Result := i;
  386. Break;
  387. end;
  388. end;
  389. end;
  390. Inc(p);
  391. end;
  392. end;
  393. {procedure ApplyStatementToSequence(
  394. var ASequence : TOrderedCharacters;
  395. const AStatement : PReorderSequence;
  396. const AStatementCount : Integer
  397. );
  398. var
  399. pse, pd : PReorderUnit;
  400. kr : Integer;
  401. function GetNextInsertPos() : Integer;
  402. var
  403. kk : Integer;
  404. begin
  405. if (pse^.WeigthKind = rwkDeletion) then
  406. exit(0);
  407. if (pse^.WeigthKind = rwkIdentity) then
  408. exit(kr + 1);
  409. kk := kr + 1;
  410. pd := @ASequence.Data[kk];
  411. for kk := kk to ASequence.ActualLength - 1 do begin
  412. if (pd^.WeigthKind <= pse^.WeigthKind) then
  413. exit(kk);
  414. Inc(pd);
  415. end;
  416. Result := ASequence.ActualLength;
  417. end;
  418. var
  419. locResetPos, i, k, h : Integer;
  420. pst : PReorderSequence;
  421. begin
  422. pst := AStatement;
  423. for h := 0 to AStatementCount - 1 do begin
  424. locResetPos := -1;
  425. if (Length(pst^.Reset) > 0) then begin
  426. locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength);
  427. if (locResetPos = -1) then
  428. raise ECldrException.CreateFmt('Character(s) not found in sequence : "%s".',[ToStr(pst^.Reset)]);
  429. end;
  430. pse := @pst^.Elements[0];
  431. kr := locResetPos;
  432. k := GetNextInsertPos();
  433. for i := Low(pst^.Elements) to High(pst^.Elements) do begin
  434. k := ASequence.Insert(pse^,k)+1;
  435. Inc(pse);
  436. end;
  437. Inc(pst);
  438. end;
  439. end;}
  440. function FindLogicalPos(
  441. const ASequence : POrderedCharacters;
  442. const APosition : TReorderLogicalReset
  443. ) : Integer;
  444. var
  445. i, c : Integer;
  446. p : PReorderUnit;
  447. firstPos, lastPos : Integer;
  448. begin
  449. Result := 0;
  450. if (ASequence^.ActualLength = 0) then
  451. exit;
  452. p := @ASequence^.Data[0];
  453. c := ASequence^.ActualLength;
  454. if (APosition in [TReorderLogicalReset.FirstTertiaryIgnorable, TReorderLogicalReset.LastTertiaryIgnorable])
  455. then begin
  456. firstPos := -1;
  457. for i := 0 to c - 1 do begin
  458. if (p^.WeigthKind <= TReorderWeigthKind.Tertiary) then begin
  459. firstPos := i;
  460. Break;
  461. end;
  462. Inc(p);
  463. end;
  464. if (firstPos = -1) then
  465. exit(0);
  466. if (APosition = TReorderLogicalReset.FirstTertiaryIgnorable) then
  467. exit(firstPos);
  468. if (p^.WeigthKind < TReorderWeigthKind.Tertiary) then
  469. exit(firstPos);
  470. lastPos := -1;
  471. for i := firstPos + 1 to c - 1 do begin
  472. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  473. lastPos := i;
  474. Break;
  475. end;
  476. Inc(p);
  477. end;
  478. if (lastPos = -1) then
  479. exit(c);
  480. exit(lastPos);
  481. end;
  482. if (APosition in [TReorderLogicalReset.FirstSecondaryIgnorable, TReorderLogicalReset.LastSecondaryIgnorable])
  483. then begin
  484. firstPos := -1;
  485. for i := 0 to c - 1 do begin
  486. if (p^.WeigthKind <= TReorderWeigthKind.Secondary) then begin
  487. firstPos := i;
  488. Break;
  489. end;
  490. Inc(p);
  491. end;
  492. if (firstPos = -1) then
  493. exit(0);
  494. if (APosition = TReorderLogicalReset.FirstSecondaryIgnorable) then
  495. exit(firstPos);
  496. if (p^.WeigthKind < TReorderWeigthKind.Secondary) then
  497. exit(firstPos);
  498. lastPos := -1;
  499. for i := firstPos + 1 to c - 1 do begin
  500. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  501. lastPos := i;
  502. Break;
  503. end;
  504. Inc(p);
  505. end;
  506. if (lastPos = -1) then
  507. exit(c);
  508. exit(lastPos);
  509. end;
  510. if (APosition in [TReorderLogicalReset.FirstPrimaryIgnorable, TReorderLogicalReset.LastPrimaryIgnorable])
  511. then begin
  512. firstPos := -1;
  513. for i := 0 to c - 1 do begin
  514. if (p^.WeigthKind <= TReorderWeigthKind.Primary) then begin
  515. firstPos := i;
  516. Break;
  517. end;
  518. Inc(p);
  519. end;
  520. if (firstPos = -1) then
  521. exit(0);
  522. if (APosition = TReorderLogicalReset.FirstPrimaryIgnorable) then
  523. exit(firstPos);
  524. if (p^.WeigthKind < TReorderWeigthKind.Primary) then
  525. exit(firstPos);
  526. lastPos := -1;
  527. for i := firstPos + 1 to c - 1 do begin
  528. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  529. lastPos := i;
  530. Break;
  531. end;
  532. Inc(p);
  533. end;
  534. if (lastPos = -1) then
  535. exit(c);
  536. exit(lastPos);
  537. end;
  538. if (APosition = TReorderLogicalReset.FirstNonIgnorable) then begin
  539. firstPos := -1;
  540. for i := 0 to c - 1 do begin
  541. if (p^.WeigthKind <= TReorderWeigthKind.Primary) then begin
  542. firstPos := i;
  543. Break;
  544. end;
  545. Inc(p);
  546. end;
  547. if (firstPos = -1) then
  548. exit(0);
  549. exit(firstPos);
  550. end;
  551. if (APosition = TReorderLogicalReset.LastNonIgnorable) then
  552. exit(c);
  553. for i := 0 to c - 1 do begin
  554. if (p^.VirtualPosition = APosition) then
  555. exit(i);
  556. Inc(p);
  557. end;
  558. end;
  559. procedure ApplyStatementToSequence(
  560. var ASequence : TOrderedCharacters;
  561. const AStatement : PReorderSequence;
  562. const AStatementCount : Integer
  563. );
  564. var
  565. pse, pd : PReorderUnit;
  566. kr : Integer;
  567. pst : PReorderSequence;
  568. function GetNextInsertPos() : Integer;
  569. var
  570. kk : Integer;
  571. begin
  572. if (pse^.WeigthKind = TReorderWeigthKind.Deletion) then
  573. exit(0);
  574. if (pse^.WeigthKind = TReorderWeigthKind.Identity) then
  575. exit(kr + 1);
  576. if not pst^.Before then begin
  577. kk := kr + 1;
  578. if (kk >= ASequence.ActualLength) then
  579. exit(kk);
  580. pd := @ASequence.Data[kk];
  581. for kk := kk to ASequence.ActualLength - 1 do begin
  582. if (pd^.WeigthKind <= pse^.WeigthKind) then
  583. exit(kk);
  584. Inc(pd);
  585. end;
  586. Result := ASequence.ActualLength;
  587. end else begin
  588. if (kr = 0) then
  589. exit(0);
  590. pd := @ASequence.Data[kr];
  591. if pd^.IsVirtual() and (pd^.VirtualPosition in FixableReorderLogicalSet) then begin
  592. kr := kr-1;
  593. if (kr = 0) then
  594. exit;
  595. end;
  596. kk := kr;
  597. pd := @ASequence.Data[kk];
  598. if (pd^.WeigthKind = TReorderWeigthKind.Primary) then begin
  599. pd^.WeigthKind := pse^.WeigthKind;
  600. pse^.WeigthKind := TReorderWeigthKind.Primary;
  601. exit(kk);
  602. end;
  603. for kk := kk downto 0 do begin
  604. if (pd^.WeigthKind = TReorderWeigthKind.Deletion) or (pd^.WeigthKind <= pse^.WeigthKind) then begin
  605. if (pd^.WeigthKind > pse^.WeigthKind) then
  606. pd^.WeigthKind := pse^.WeigthKind;
  607. exit(kk);
  608. end;
  609. Dec(pd);
  610. end;
  611. Result := 0;
  612. end;
  613. end;
  614. var
  615. locResetPos, i, k, h : Integer;
  616. begin
  617. if (Length(AStatement^.Elements) = 0) then
  618. exit;
  619. pst := AStatement;
  620. for h := 0 to AStatementCount - 1 do begin
  621. locResetPos := -1;
  622. if (pst^.LogicalPosition > TReorderLogicalReset.None) then
  623. locResetPos := FindLogicalPos(@ASequence,pst^.LogicalPosition)
  624. else if (Length(pst^.Reset) > 0) then begin
  625. locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength);
  626. {if (locResetPos = -1) then
  627. raise ECldrException.CreateFmt('Character(s) not found in sequence : "%s".',[ToStr(pst^.Reset)]);}
  628. if (locResetPos = -1) then
  629. locResetPos := ASequence.ActualLength;
  630. end;
  631. if (pst^.LogicalPosition in FixableReorderLogicalSet) then begin
  632. if (locResetPos < 0) or
  633. (locResetPos >= ASequence.ActualLength) or
  634. not(ASequence.Data[locResetPos].VirtualPosition in FixableReorderLogicalSet)
  635. then begin
  636. locResetPos := ASequence.Append(TReorderUnit.From(pst^.LogicalPosition));
  637. end;
  638. end;
  639. pse := @pst^.Elements[0];
  640. kr := locResetPos;
  641. k := GetNextInsertPos();
  642. for i := Low(pst^.Elements) to High(pst^.Elements) do begin
  643. k := ASequence.Insert(pse^,k)+1;
  644. Inc(pse);
  645. end;
  646. Inc(pst);
  647. end;
  648. end;
  649. type
  650. PUCA_WeightRecArray = ^TUCA_WeightRecArray;
  651. TUCASortKey = array of Word;
  652. function SimpleFormKey(const ACEList : TUCA_WeightRecArray) : TUCASortKey;
  653. var
  654. r : TUCASortKey;
  655. i, c, k, ral, levelCount : Integer;
  656. pce : ^TUCA_WeightRec;
  657. begin
  658. c := Length(ACEList);
  659. if (c = 0) then
  660. exit(nil);
  661. //SetLength(r,((3+1{Level Separator})*c));
  662. levelCount := Length(ACEList[0].Weights);
  663. if (levelCount > 3) then
  664. levelCount := 3;
  665. SetLength(r,(levelCount*c + levelCount));
  666. ral := 0;
  667. for i := 0 to levelCount - 1 do begin
  668. for k := 0 to c - 1 do begin
  669. pce := @ACEList[k];
  670. if (pce^.Weights[i] <> 0) then begin
  671. r[ral] := pce^.Weights[i];
  672. ral := ral + 1;
  673. end;
  674. //pce := pce + 1;
  675. end;
  676. r[ral] := 0;
  677. ral := ral + 1;
  678. end;
  679. ral := ral - 1;
  680. SetLength(r,ral);
  681. Result := r;
  682. end;
  683. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  684. var
  685. i, hb : Integer;
  686. begin
  687. if (Pointer(A) = Pointer(B)) then
  688. exit(0);
  689. Result := 1;
  690. hb := Length(B) - 1;
  691. for i := 0 to Length(A) - 1 do begin
  692. if (i > hb) then
  693. exit;
  694. if (A[i] < B[i]) then
  695. exit(-1);
  696. if (A[i] > B[i]) then
  697. exit(1);
  698. end;
  699. if (Length(A) = Length(B)) then
  700. exit(0);
  701. exit(-1);
  702. end;
  703. {function ComputeWeigths(
  704. const AData : PReorderUnit;
  705. const ADataLen : Integer;
  706. const ADataWeigths : TUCA_LineRecArray;
  707. out AResult : TUCA_LineRecArray
  708. ) : Integer;
  709. function GetWeigth(AItem : PReorderUnit) : PUCA_WeightRecArray;
  710. begin
  711. Result := nil;
  712. if (AItem^.InitialPosition < 1) or (AItem^.InitialPosition > Length(ADataWeigths)) then
  713. raise ECldrException.CreateFmt('Invalid "InitialPosition" value : %d.',[AItem^.InitialPosition]);
  714. Result := @ADataWeigths[(AItem^.InitialPosition-1)].Weights;
  715. end;
  716. var
  717. c, i, ral : Integer;
  718. p, q : PReorderUnit;
  719. r : TUCA_LineRecArray;
  720. pr : PUCA_LineRec;
  721. pbase : PReorderUnit;
  722. pw, pwb : PUCA_WeightRecArray;
  723. cw, ki : Integer;
  724. begin
  725. Result := 0;
  726. if (ADataLen < 1) then
  727. exit;
  728. c := ADataLen;
  729. ral := 0;
  730. SetLength(r,c);
  731. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  732. q := nil;
  733. pbase := nil;
  734. p := AData+1;
  735. pr := @r[0];
  736. i := 1;
  737. while (i < c) do begin
  738. if p^.Changed then begin
  739. if (pbase = nil) then begin
  740. pbase := p - 1;
  741. pwb := GetWeigth(pbase);
  742. end;
  743. if (p^.WeigthKind = rwkIdentity) then begin
  744. pr^.CodePoints := Copy(p^.Characters);
  745. q := p - 1;
  746. if (q = pbase) then
  747. pw := pwb
  748. else
  749. pw := @((pr-1)^.Weights);
  750. pr^.Weights := Copy(pw^);
  751. Inc(pr);
  752. Inc(ral);
  753. end else begin
  754. pr^.CodePoints := Copy(p^.Characters);
  755. q := p - 1;
  756. if (q = pbase) then begin
  757. pw := pwb;
  758. cw := (Length(pw^)+1);
  759. SetLength(pr^.Weights,cw);
  760. Move(pw^[0],pr^.Weights[0],((cw-1)*SizeOf(pw^[0])));
  761. FillByte(pr^.Weights[(cw-1)],SizeOf(pr^.Weights[0]),0);
  762. ki := Ord(p^.WeigthKind);
  763. pr^.Weights[(cw-1)].Weights[ki] := pr^.Weights[(cw-2)].Weights[ki]+1;
  764. end else begin
  765. pw := @((pr-1)^.Weights);
  766. pr^.Weights := Copy(pw^);
  767. cw := Length(pr^.Weights);
  768. ki := Ord(p^.WeigthKind);
  769. for ki := Ord(rwkPrimary) to Ord(rwkTertiary) do begin
  770. if (ki < Ord(p^.WeigthKind)) then
  771. pr^.Weights[(cw-1)].Weights[ki] := pw^[(cw-1)].Weights[ki]
  772. else if (ki = Ord(p^.WeigthKind)) then begin
  773. if (pw^[(cw-1)].Weights[ki] = 0) then
  774. pr^.Weights[(cw-1)].Weights[ki] := pwb^[(Length(pwb^)-1)].Weights[ki]+1
  775. else
  776. pr^.Weights[(cw-1)].Weights[ki] := pw^[(cw-1)].Weights[ki]+1;
  777. end else begin
  778. pr^.Weights[(cw-1)].Weights[ki] := 0;
  779. end;
  780. end;
  781. end;
  782. Inc(pr);
  783. Inc(ral);
  784. end;
  785. end else begin
  786. pbase := nil;
  787. pwb := nil;
  788. end;
  789. Inc(p);
  790. Inc(i);
  791. end;
  792. SetLength(r,ral);
  793. AResult := r;
  794. Result := Length(AResult);
  795. end;}
  796. function IndexOf(
  797. const APattern : array of TUnicodeCodePoint;
  798. const AList : PUCA_LineRec;
  799. const AListLen : Integer
  800. ) : Integer;
  801. var
  802. i, lengthPattern, sizePattern : Integer;
  803. pl : PUCA_LineRec;
  804. begin
  805. Result := -1;
  806. if (Length(APattern) = 0) then
  807. exit;
  808. if (AListLen = 0) then
  809. exit;
  810. lengthPattern := Length(APattern);
  811. sizePattern := lengthPattern*SizeOf(TUnicodeCodePoint);
  812. pl := AList;
  813. for i := 0 to AListLen - 1 do begin
  814. if (Length(pl^.CodePoints) = lengthPattern) and
  815. CompareMem(@pl^.CodePoints[0],@APattern[0],sizePattern)
  816. then begin
  817. Result := i;
  818. Break;
  819. end;
  820. Inc(pl);
  821. end;
  822. end;
  823. function IsIgnorable(AWeight : TUCA_WeightRecArray) : Boolean;
  824. var
  825. i : Integer;
  826. begin
  827. if (Length(AWeight) = 0) then
  828. exit(True);
  829. for i := Low(AWeight) to High(AWeight) do begin
  830. if (AWeight[i].Weights[0] <> 0) or
  831. (AWeight[i].Weights[1] <> 0) or
  832. (AWeight[i].Weights[2] <> 0)
  833. then begin
  834. exit(False);
  835. end;
  836. end;
  837. Result := True;
  838. end;
  839. function RemoveIgnorables(
  840. AItem : TUnicodeCodePointArray;
  841. const AList : PUCA_LineRec;
  842. const AListLen : Integer
  843. ) : TUnicodeCodePointArray;
  844. var
  845. i, c, k : Integer;
  846. begin
  847. SetLength(Result,Length(AItem));
  848. c := 0;
  849. for i := 0 to Length(AItem) - 1 do begin
  850. k := IndexOf([AItem[i]],AList,AListLen);
  851. if (k >= 0) and
  852. IsIgnorable(AList[k].Weights)
  853. then
  854. k := -1;
  855. if (k >= 0) then begin
  856. Result[c] := AItem[i];
  857. c := c+1;
  858. end;
  859. end;
  860. SetLength(Result,c);
  861. end;
  862. function Compress(
  863. const AData : TUCA_LineRecArray;
  864. out AResult : TUCA_LineRecArray
  865. ) : Boolean;
  866. var
  867. r : TUCA_LineRecArray;
  868. pr, p : PUCA_LineRec;
  869. ral : Integer;
  870. function FindOutSlot() : Boolean;
  871. var
  872. k : Integer;
  873. begin
  874. k := IndexOf(p^.CodePoints,@r[0],ral);
  875. Result := (k >= 0);
  876. if (k = -1) then begin
  877. k := ral;
  878. ral := ral + 1;
  879. end;
  880. pr := @r[k];
  881. end;
  882. procedure AddContextData();
  883. var
  884. k : Integer;
  885. begin
  886. if not p^.HasContext() then
  887. exit;
  888. k := Length(pr^.Context.Data);
  889. SetLength(pr^.Context.Data,(k+1));
  890. pr^.Context.Data[k].CodePoints := Copy(p^.Context.Data[0].CodePoints);
  891. pr^.Context.Data[k].Weights := Copy(p^.Weights);
  892. end;
  893. procedure AddItem();
  894. begin
  895. pr^.Assign(p^);
  896. if p^.HasContext() then begin
  897. SetLength(pr^.Context.Data,0);
  898. pr^.Weights := nil;
  899. AddContextData();
  900. end;
  901. end;
  902. var
  903. c, i : Integer;
  904. begin
  905. c := Length(AData);
  906. if (c = 0) then
  907. exit;
  908. SetLength(r,c);
  909. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  910. pr := @r[0];
  911. p := @AData[0];
  912. ral := 0;
  913. i := 0;
  914. AddItem();
  915. ral := 1;
  916. i := 1;
  917. Inc(p);
  918. while (i < c) do begin
  919. if FindOutSlot() then
  920. AddContextData()
  921. else
  922. AddItem();
  923. Inc(p);
  924. Inc(i);
  925. end;
  926. SetLength(r,ral);
  927. AResult := r;
  928. Result := (ral < Length(AData));
  929. end;
  930. function MarkSuffixAsChanged(
  931. const AData : PReorderUnit;
  932. const ADataLen : Integer
  933. ) : Integer;
  934. var
  935. i, k : Integer;
  936. p, q : PReorderUnit;
  937. suffixChar : TUnicodeCodePoint;
  938. begin
  939. Result := 0;
  940. if (ADataLen <= 1) then
  941. exit;
  942. q := AData;
  943. p := AData;
  944. for i := 0 to ADataLen - 1 do begin
  945. if not(p^.IsVirtual()) and p^.Changed then begin
  946. suffixChar := p^.Characters[0];
  947. for k := 0 to ADataLen - 1 do begin
  948. if not(q[k].Changed) and (q[k].Characters[0] = suffixChar) then begin
  949. q[k].Changed := True;
  950. Result := Result + 1;
  951. end;
  952. end;
  953. end;
  954. Inc(p);
  955. end;
  956. end;
  957. {$include weight_derivation.inc}
  958. function InternalComputeWeigths(
  959. const AData : PReorderUnit;
  960. const ADataLen : Integer;
  961. const ADataWeigths : TUCA_LineRecArray;
  962. out AResult : TUCA_LineRecArray
  963. ) : Integer;
  964. function GetWeigth(AItem : PReorderUnit) : PUCA_WeightRecArray;
  965. begin
  966. Result := nil;
  967. if (AItem^.InitialPosition < 1) or (AItem^.InitialPosition > Length(ADataWeigths)) then
  968. raise ECldrException.CreateFmt('Invalid "InitialPosition" value : %d.',[AItem^.InitialPosition]);
  969. Result := @ADataWeigths[(AItem^.InitialPosition-1)].Weights;
  970. end;
  971. var
  972. r : TUCA_LineRecArray;
  973. pr : PUCA_LineRec;
  974. procedure AddContext(const ACodePointPattern : TUnicodeCodePointArray);
  975. var
  976. k : Integer;
  977. begin
  978. k := Length(pr^.Context.Data);
  979. SetLength(pr^.Context.Data,(k+1));
  980. pr^.Context.Data[k].CodePoints := Copy(ACodePointPattern);
  981. SetLength(pr^.Context.Data[k].Weights,0);
  982. end;
  983. var
  984. ral : Integer;
  985. i : Integer;
  986. p : PReorderUnit;
  987. pbase : PReorderUnit;
  988. pwb : PUCA_WeightRecArray;
  989. actualBegin : Boolean;
  990. loopIndex : Integer;
  991. procedure SkipDeletion();
  992. begin
  993. pr^.CodePoints := Copy(p^.Characters);
  994. pr^.Deleted := True;
  995. SetLength(pr^.Weights,0);
  996. if p^.HasContext() then
  997. AddContext(p^.Context);
  998. Inc(pr);
  999. Inc(ral);
  1000. Inc(p);
  1001. Inc(i);
  1002. end;
  1003. procedure FindBaseItem();
  1004. begin
  1005. if (pbase = nil) or (pwb^ = nil) then begin
  1006. if actualBegin then begin
  1007. pwb := @ADataWeigths[0].Weights;
  1008. end else begin
  1009. pbase := p - 1;
  1010. if pbase^.Changed then
  1011. pwb := @((pr-1)^.Weights)
  1012. else
  1013. pwb := GetWeigth(pbase);
  1014. if (pwb^ = nil) and (pbase = AData) then
  1015. pwb := @ADataWeigths[0].Weights;
  1016. end;
  1017. end;
  1018. end;
  1019. function InternalComputeWeights(const AList : array of TUnicodeCodePointArray) : TUCA_WeightRecArray;
  1020. var
  1021. kral : Integer;
  1022. kres : TUCA_WeightRecArray;
  1023. procedure EnsureResultLength(const APlus : Integer);//inline;
  1024. begin
  1025. if ((kral+APlus) > Length(kres)) then
  1026. SetLength(kres,(2*(kral+APlus)));
  1027. end;
  1028. procedure AddToResult(const AValue : TUCA_WeightRecArray);//inline;
  1029. begin
  1030. EnsureResultLength(Length(AValue));
  1031. Move(AValue[0],kres[kral],(Length(AValue)*SizeOf(kres[0])));
  1032. kral := kral + Length(AValue);
  1033. end;
  1034. var
  1035. kc, k, ktempIndex, ki : Integer;
  1036. tmpWeight : array of TUCA_PropWeights;
  1037. begin
  1038. kc := Length(AList);
  1039. kral := 0;
  1040. SetLength(kres,(10*kc));
  1041. FillChar(kres[0],(Length(kres)*SizeOf(kres[0])),0);
  1042. for k := 0 to kc - 1 do begin
  1043. ktempIndex := IndexOf(AList[k],@r[0],ral);
  1044. if (ktempIndex <> -1) then begin
  1045. AddToResult(r[ktempIndex].Weights);
  1046. Continue;
  1047. end;
  1048. ktempIndex := IndexOf(AList[k],[],AData,ADataLen);
  1049. if (ktempIndex <> -1) then begin
  1050. if not AData[ktempIndex].Changed then begin
  1051. AddToResult(ADataWeigths[AData[ktempIndex].InitialPosition-1].Weights);
  1052. Continue;
  1053. end;
  1054. end;
  1055. if (Length(AList[k]) > 1) then begin
  1056. for ki := 0 to Length(AList[k]) - 1 do begin
  1057. ktempIndex := IndexOf([AList[k][ki]],@r[0],ral);
  1058. if (ktempIndex <> -1) then begin
  1059. AddToResult(r[ktempIndex].Weights);
  1060. Continue;
  1061. end;
  1062. ktempIndex := IndexOf([AList[k][ki]],[],AData,ADataLen);
  1063. if (ktempIndex <> -1) then begin
  1064. if not AData[ktempIndex].Changed then begin
  1065. AddToResult(ADataWeigths[AData[ktempIndex].InitialPosition-1].Weights);
  1066. Continue;
  1067. end;
  1068. end;
  1069. SetLength(tmpWeight,2);
  1070. DeriveWeight(AList[k][ki],@tmpWeight[0]);
  1071. EnsureResultLength(2);
  1072. kres[kral].Weights[0] := tmpWeight[0].Weights[0];
  1073. kres[kral].Weights[1] := tmpWeight[0].Weights[1];
  1074. kres[kral].Weights[2] := tmpWeight[0].Weights[2];
  1075. kres[kral+1].Weights[0] := tmpWeight[1].Weights[0];
  1076. kres[kral+1].Weights[1] := tmpWeight[1].Weights[1];
  1077. kres[kral+1].Weights[2] := tmpWeight[1].Weights[2];
  1078. kral := kral + 2;
  1079. tmpWeight := nil;
  1080. end
  1081. end;
  1082. SetLength(tmpWeight,2);
  1083. DeriveWeight(AList[k][0],@tmpWeight[0]);
  1084. EnsureResultLength(2);
  1085. kres[kral].Weights[0] := tmpWeight[0].Weights[0];
  1086. kres[kral].Weights[1] := tmpWeight[0].Weights[1];
  1087. kres[kral].Weights[2] := tmpWeight[0].Weights[2];
  1088. kres[kral+1].Weights[0] := tmpWeight[1].Weights[0];
  1089. kres[kral+1].Weights[1] := tmpWeight[1].Weights[1];
  1090. kres[kral+1].Weights[2] := tmpWeight[1].Weights[2];
  1091. kral := kral + 2;
  1092. tmpWeight := nil;
  1093. end;
  1094. SetLength(kres,kral);
  1095. Result := kres;
  1096. end;
  1097. procedure Handle_Expansion();
  1098. var
  1099. expChars : array[0..1] of TUnicodeCodePointArray;
  1100. kres : TUCA_WeightRecArray;
  1101. begin
  1102. expChars[0] := (p-1)^.Characters;
  1103. expChars[1] := p^.ExpansionChars;
  1104. kres := InternalComputeWeights(expChars);
  1105. if (p^.WeigthKind <= TReorderWeigthKind.Tertiary) then
  1106. Inc(kres[Length(kres)-1].Weights[Ord(p^.WeigthKind)]);
  1107. pr^.Weights := Copy(kres);
  1108. end;
  1109. var
  1110. c, ti : Integer;
  1111. q : PReorderUnit;
  1112. pw : PUCA_WeightRecArray;
  1113. begin
  1114. Result := 0;
  1115. if (ADataLen < 1) then
  1116. exit;
  1117. while True do begin
  1118. for loopIndex := 0 to 1 do begin
  1119. c := ADataLen;
  1120. ral := 0;
  1121. SetLength(r,c);
  1122. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  1123. q := nil;
  1124. pbase := nil;
  1125. pr := @r[0];
  1126. p := AData;
  1127. i := 0;
  1128. while (i < c) do begin
  1129. if (p^.WeigthKind = TReorderWeigthKind.Deletion) then begin
  1130. SkipDeletion();
  1131. Continue;
  1132. end;
  1133. if p^.Changed then begin
  1134. actualBegin := (i = 0) or (((p-1)^.WeigthKind = TReorderWeigthKind.Deletion));
  1135. FindBaseItem();
  1136. if p^.IsExpansion() then begin
  1137. if (loopIndex = 0) then begin
  1138. Inc(p);
  1139. Inc(i);
  1140. while (i < c) do begin
  1141. if (p^.WeigthKind = TReorderWeigthKind.Primary) then
  1142. Break;
  1143. Inc(p);
  1144. Inc(i);
  1145. end;
  1146. Continue;
  1147. end;
  1148. pr^.CodePoints := Copy(p^.Characters);
  1149. Handle_Expansion();
  1150. if p^.HasContext() then
  1151. AddContext(p^.Context);
  1152. Inc(pr);
  1153. Inc(ral);
  1154. end else if actualBegin then begin
  1155. pr^.CodePoints := Copy(p^.Characters);
  1156. pw := pwb;
  1157. pr^.Weights := Copy(pw^);
  1158. if p^.HasContext() then
  1159. AddContext(p^.Context);
  1160. Inc(pr);
  1161. Inc(ral);
  1162. end else if (p^.WeigthKind = TReorderWeigthKind.Identity) then begin
  1163. pr^.CodePoints := Copy(p^.Characters);
  1164. q := p - 1;
  1165. if (q = pbase) then
  1166. pw := pwb
  1167. else
  1168. pw := @((pr-1)^.Weights);
  1169. pr^.Weights := Copy(pw^);
  1170. if p^.HasContext() then
  1171. AddContext(p^.Context);
  1172. Inc(pr);
  1173. Inc(ral);
  1174. end else begin
  1175. pr^.CodePoints := Copy(p^.Characters);
  1176. if ((p - 1) = pbase) then begin
  1177. if (p^.WeigthKind = TReorderWeigthKind.Primary) then begin
  1178. SetLength(pr^.Weights,2);
  1179. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  1180. pr^.Weights[0].Weights[0] := (pwb^[0].Weights[0] + 1);
  1181. pr^.Weights[0].Variable := pwb^[0].Variable;
  1182. pr^.Weights[1] := pr^.Weights[0];
  1183. end else if (p^.WeigthKind = TReorderWeigthKind.Secondary) then begin
  1184. SetLength(pr^.Weights,2);
  1185. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  1186. pr^.Weights[0].Weights[0] := pwb^[0].Weights[0];
  1187. pr^.Weights[0].Weights[1] := (pwb^[0].Weights[1] + 1);
  1188. pr^.Weights[0].Variable := pwb^[0].Variable;
  1189. pr^.Weights[1].Weights[0] := pr^.Weights[0].Weights[0];
  1190. pr^.Weights[1].Variable := pr^.Weights[0].Variable;
  1191. end else if (p^.WeigthKind = TReorderWeigthKind.Tertiary) then begin
  1192. SetLength(pr^.Weights,2);
  1193. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  1194. pr^.Weights[0].Weights[0] := pwb^[0].Weights[0];
  1195. pr^.Weights[0].Weights[1] := pwb^[0].Weights[1];
  1196. pr^.Weights[0].Weights[2] := (pwb^[0].Weights[2] + 1);
  1197. pr^.Weights[0].Variable := pwb^[0].Variable;
  1198. pr^.Weights[1].Weights[0] := pr^.Weights[0].Weights[0];
  1199. pr^.Weights[1].Variable := pr^.Weights[0].Variable;
  1200. end;
  1201. end else begin
  1202. pr^.Weights := Copy((pr-1)^.Weights);
  1203. if (p^.WeigthKind = TReorderWeigthKind.Primary) then
  1204. Inc(pr^.Weights[1].Weights[Ord(p^.WeigthKind)])
  1205. else
  1206. Inc(pr^.Weights[0].Weights[Ord(p^.WeigthKind)]);
  1207. end;
  1208. if p^.HasContext() then
  1209. AddContext(p^.Context);
  1210. Inc(pr);
  1211. Inc(ral);
  1212. end;
  1213. end else begin
  1214. if (i > 0) and ((p-1)^.WeigthKind <> TReorderWeigthKind.Deletion) and (p-1)^.Changed and
  1215. (ral > 0)
  1216. then begin
  1217. pw := GetWeigth(p);
  1218. ti := CompareSortKey(SimpleFormKey((pr-1)^.Weights),SimpleFormKey(pw^));
  1219. if ( (p^.WeigthKind = TReorderWeigthKind.Identity) and (ti > 0) ) or
  1220. ( (p^.WeigthKind >= TReorderWeigthKind.Primary) and (ti >= 0) )
  1221. then begin
  1222. p^.Changed := True;
  1223. Continue;
  1224. end;
  1225. end;
  1226. pbase := nil;
  1227. pwb := nil;
  1228. end;
  1229. Inc(p);
  1230. Inc(i);
  1231. end;
  1232. end;
  1233. SetLength(r,ral);
  1234. if (MarkSuffixAsChanged(AData,ADataLen) = 0) then
  1235. Break;
  1236. end;
  1237. Compress(r,AResult);
  1238. Result := Length(AResult);
  1239. end;
  1240. function ComputeWeigths(
  1241. const AData : PReorderUnit;
  1242. const ADataLen : Integer;
  1243. const ADataWeigths : TUCA_LineRecArray;
  1244. out AResult : TUCA_LineRecArray
  1245. ) : Integer;
  1246. var
  1247. locData : array of TReorderUnit;
  1248. i, actualLength : Integer;
  1249. p : PReorderUnit;
  1250. begin
  1251. SetLength(locData,ADataLen);
  1252. actualLength := 0;
  1253. p := AData;
  1254. for i := 0 to ADataLen-1 do begin
  1255. if not p^.IsVirtual() then begin
  1256. locData[actualLength].Assign(p);
  1257. actualLength := actualLength+1;
  1258. end;
  1259. Inc(p);
  1260. end;
  1261. if (Length(locData) <> actualLength) then
  1262. SetLength(locData,actualLength);
  1263. Result := InternalComputeWeigths(@locData[0],actualLength,ADataWeigths,AResult);
  1264. p := AData;
  1265. for i := 0 to actualLength-1 do begin
  1266. while p^.IsVirtual() do begin
  1267. Inc(p);
  1268. end;
  1269. p^.Assign(@locData[i]);
  1270. Inc(p);
  1271. end;
  1272. end;
  1273. function FillInitialPositions(
  1274. AData : PReorderUnit;
  1275. const ADataLen : Integer;
  1276. const ADataWeigths : TUCA_LineRecArray
  1277. ) : Integer;
  1278. var
  1279. locNotFound, i, cw : Integer;
  1280. p : PReorderUnit;
  1281. pw : PUCA_LineRec;
  1282. chars : TUnicodeCodePointArray;
  1283. begin
  1284. locNotFound := 0;
  1285. cw := Length(ADataWeigths);
  1286. if (cw > 0) then
  1287. pw := @ADataWeigths[0]
  1288. else
  1289. pw := nil;
  1290. p := AData;
  1291. for i := 0 to ADataLen - 1 do begin
  1292. p^.InitialPosition := IndexOf(p^.Characters,pw,cw) + 1;
  1293. if (p^.InitialPosition = 0) then begin
  1294. chars := RemoveIgnorables(p^.Characters,pw,cw);
  1295. p^.InitialPosition := IndexOf(chars,pw,cw) + 1;
  1296. end;
  1297. if (p^.InitialPosition = 0) then
  1298. Inc(locNotFound);
  1299. Inc(p);
  1300. end;
  1301. Result := locNotFound;
  1302. end;
  1303. { TCldrImportList }
  1304. function TCldrImportList.GetCount: Integer;
  1305. begin
  1306. Result := Length(FItems);
  1307. end;
  1308. function TCldrImportList.GetItem(AIndex : Integer): TCldrImport;
  1309. begin
  1310. if (AIndex < 0) or (AIndex >= Length(FItems)) then
  1311. raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
  1312. Result := FItems[AIndex];
  1313. end;
  1314. destructor TCldrImportList.Destroy();
  1315. begin
  1316. Clear();
  1317. inherited;
  1318. end;
  1319. procedure TCldrImportList.Clear();
  1320. var
  1321. i : Integer;
  1322. begin
  1323. for i := Low(FItems) to High(FItems) do
  1324. FreeAndNil(FItems[i]);
  1325. SetLength(FItems,0);
  1326. end;
  1327. function TCldrImportList.IndexOf(const ASource, AType: string): Integer;
  1328. var
  1329. i : Integer;
  1330. begin
  1331. for i := Low(FItems) to High(FItems) do begin
  1332. if (FItems[i].Source = ASource) and (FItems[i].TypeName = AType) then begin
  1333. Result := i;
  1334. exit;
  1335. end;
  1336. end;
  1337. Result := -1;
  1338. end;
  1339. function TCldrImportList.Find(const ASource, AType: string): TCldrImport;
  1340. var
  1341. i : Integer;
  1342. begin
  1343. i := IndexOf(ASource,AType);
  1344. if (i >= 0) then
  1345. Result := FItems[i]
  1346. else
  1347. Result := nil;
  1348. end;
  1349. function TCldrImportList.Add(const ASource, AType: string): TCldrImport;
  1350. var
  1351. i : Integer;
  1352. begin
  1353. i := IndexOf(ASource,AType);
  1354. if (i >= 0) then begin
  1355. Result := FItems[i];
  1356. end else begin
  1357. Result := TCldrImport.Create();
  1358. Result.FSource := ASource;
  1359. Result.FTypeName := AType;
  1360. i := Length(FItems);
  1361. SetLength(FItems,(i+1));
  1362. FItems[i] := Result;
  1363. end;
  1364. end;
  1365. { TCldrCollationRepository }
  1366. function TCldrCollationRepository.GetItem(const AIndex : Integer): TCldrCollation;
  1367. begin
  1368. if (AIndex < 0) or (AIndex >= Length(FItems)) then
  1369. raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
  1370. Result := FItems[AIndex];
  1371. end;
  1372. function TCldrCollationRepository.GetItemCount: Integer;
  1373. begin
  1374. Result := Length(FItems);
  1375. end;
  1376. function TCldrCollationRepository.IndexOfItem(AItem: TCldrCollation): Integer;
  1377. var
  1378. i : Integer;
  1379. begin
  1380. for i := Low(FItems) to High(FItems) do begin
  1381. if (FItems[i] = AItem) then begin
  1382. Result := i;
  1383. exit;
  1384. end;
  1385. end;
  1386. Result := -1;
  1387. end;
  1388. procedure TCldrCollationRepository.Add(AItem: TCldrCollation);
  1389. var
  1390. i : Integer;
  1391. begin
  1392. if (AItem = nil) then
  1393. raise EArgumentException.CreateFmt(SParamIsNil,['AItem: TCldrCollation']);
  1394. if (IndexOfItem(AItem) >= 0) then
  1395. raise EArgumentException.CreateFmt(sCollationsExistsAlready,[AItem.Language]);
  1396. i := Length(FItems);
  1397. SetLength(FItems,(i+1));
  1398. AItem.FRepository := Self;
  1399. FItems[i] := AItem;
  1400. end;
  1401. constructor TCldrCollationRepository.Create(ALoader: ICldrCollationLoader);
  1402. begin
  1403. if (ALoader = nil) then
  1404. raise EArgumentException.CreateFmt(SInvalidPropertyElement,['Loader']);
  1405. SetLoader(ALoader);
  1406. end;
  1407. destructor TCldrCollationRepository.Destroy;
  1408. begin
  1409. Clear();
  1410. inherited Destroy;
  1411. end;
  1412. procedure TCldrCollationRepository.FreeItems();
  1413. var
  1414. i : Integer;
  1415. begin
  1416. for i := 0 to Length(FItems) - 1 do
  1417. FreeAndNil(FItems[i]);
  1418. SetLength(FItems,0);
  1419. end;
  1420. procedure TCldrCollationRepository.Clear();
  1421. begin
  1422. FreeItems();
  1423. end;
  1424. procedure TCldrCollationRepository.SetLoader(AValue: ICldrCollationLoader);
  1425. begin
  1426. if (FLoader <> AValue) then
  1427. FLoader := AValue;
  1428. end;
  1429. function TCldrCollationRepository.IndexOf(const ALanguage: string): Integer;
  1430. var
  1431. i : Integer;
  1432. begin
  1433. for i := Low(FItems) to High(FItems) do begin
  1434. if (FItems[i].Language = ALanguage) then begin
  1435. Result := i;
  1436. exit;
  1437. end
  1438. end;
  1439. Result := -1;
  1440. end;
  1441. function TCldrCollationRepository.Find(const ALanguage: string): TCldrCollation;
  1442. var
  1443. i : Integer;
  1444. begin
  1445. i := IndexOf(ALanguage);
  1446. if (i >= 0) then
  1447. Result := FItems[i]
  1448. else
  1449. Result := nil;
  1450. end;
  1451. function TCldrCollationRepository.Load(
  1452. const ALanguage : string;
  1453. const AMode : TCldrParserMode
  1454. ) : TCldrCollation;
  1455. var
  1456. isnew : Boolean;
  1457. begin
  1458. Result := Find(ALanguage);
  1459. if (Result <> nil) then begin
  1460. if (Result.Mode = TCldrParserMode.FullParsing) or (Result.Mode = AMode) then
  1461. exit;
  1462. end;
  1463. isnew := (Result = nil);
  1464. if isnew then
  1465. Result := TCldrCollation.Create();
  1466. try
  1467. Loader.LoadCollation(ALanguage,Result,AMode);
  1468. Add(Result);
  1469. except
  1470. if isnew then
  1471. FreeAndNil(Result);
  1472. raise;
  1473. end;
  1474. end;
  1475. { TCldrCollationItem }
  1476. constructor TCldrCollationItem.Create;
  1477. begin
  1478. FImports := TCldrImportList.Create();
  1479. end;
  1480. destructor TCldrCollationItem.Destroy;
  1481. begin
  1482. FImports.Free();
  1483. inherited Destroy;
  1484. end;
  1485. procedure TCldrCollationItem.Clear();
  1486. begin
  1487. FBackwards := False;
  1488. FBase := '';
  1489. FChangedFields := [];
  1490. SetLength(FRules,0);
  1491. FTypeName := '';
  1492. FImports.Clear();
  1493. end;
  1494. function TCldrCollationItem.IsPrivate() : Boolean;
  1495. begin
  1496. Result := (Pos('private-',TypeName) = 1);
  1497. end;
  1498. { TCldrCollation }
  1499. function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
  1500. begin
  1501. if (Index < 0) or (Index >= Length(FItems)) then
  1502. raise ERangeError.CreateFmt(SListIndexError,[Index]);
  1503. Result := FItems[Index];
  1504. end;
  1505. function TCldrCollation.GetItemCount: Integer;
  1506. begin
  1507. Result := Length(FItems);
  1508. end;
  1509. destructor TCldrCollation.Destroy;
  1510. begin
  1511. Clear();
  1512. inherited Destroy;
  1513. end;
  1514. procedure TCldrCollation.Clear();
  1515. var
  1516. i : Integer;
  1517. begin
  1518. for i := 0 to Length(FItems) - 1 do
  1519. FreeAndNil(FItems[i]);
  1520. SetLength(FItems,0);
  1521. FLocalID := '';
  1522. FDefaultType := '';
  1523. end;
  1524. function TCldrCollation.IndexOf(const AItemName: string): Integer;
  1525. var
  1526. i : Integer;
  1527. begin
  1528. for i := 0 to ItemCount - 1 do begin
  1529. if SameText(AItemName,Items[i].TypeName) then
  1530. exit(i);
  1531. end;
  1532. Result := -1;
  1533. end;
  1534. function TCldrCollation.IndexOf(const AItemName, AItemAlt: string): Integer;
  1535. var
  1536. i : Integer;
  1537. begin
  1538. for i := 0 to ItemCount - 1 do begin
  1539. if SameText(AItemName,Items[i].TypeName) and
  1540. SameText(AItemAlt,Items[i].Alt)
  1541. then begin
  1542. exit(i);
  1543. end;
  1544. end;
  1545. Result := -1;
  1546. end;
  1547. function TCldrCollation.Find(const AItemName: string): TCldrCollationItem;
  1548. var
  1549. i : Integer;
  1550. begin
  1551. i := IndexOf(AItemName);
  1552. if (i = - 1) then
  1553. Result := nil
  1554. else
  1555. Result := Items[i];
  1556. end;
  1557. function TCldrCollation.Find(const AItemName, AItemAlt: string): TCldrCollationItem;
  1558. var
  1559. i : Integer;
  1560. begin
  1561. i := IndexOf(AItemName,AItemAlt);
  1562. if (i = - 1) then
  1563. Result := nil
  1564. else
  1565. Result := Items[i];
  1566. end;
  1567. function TCldrCollation.Add(AItem: TCldrCollationItem): Integer;
  1568. begin
  1569. Result := Length(FItems);
  1570. SetLength(FItems,(Result+1));
  1571. FItems[Result] := AItem;
  1572. AItem.FParent := Self;
  1573. end;
  1574. function TCldrCollation.FindPublicItemCount() : Integer;
  1575. var
  1576. r, i : Integer;
  1577. begin
  1578. r := 0;
  1579. for i := 0 to ItemCount-1 do begin
  1580. if not Items[i].IsPrivate() then
  1581. r := r+1;
  1582. end;
  1583. Result := r;
  1584. end;
  1585. { TReorderSequence }
  1586. procedure TReorderSequence.Clear();
  1587. begin
  1588. Reset := nil;
  1589. Elements := nil;
  1590. LogicalPosition := TReorderLogicalReset(0);
  1591. Before := False;
  1592. end;
  1593. procedure TReorderSequence.SetElementCount(const ALength: Integer);
  1594. begin
  1595. SetLength(Elements,ALength);
  1596. end;
  1597. procedure TReorderSequence.Assign(ASource: PReorderSequence);
  1598. var
  1599. c, i : Integer;
  1600. begin
  1601. if (ASource = nil) then begin
  1602. Self.Clear();
  1603. exit;
  1604. end;
  1605. Self.Reset := Copy(ASource^.Reset);
  1606. c := Length(ASource^.Elements);
  1607. SetLength(Self.Elements,c);
  1608. for i := 0 to c-1 do
  1609. Self.Elements[i].Assign(@ASource^.Elements[i]);
  1610. Self.Before := ASource^.Before;
  1611. end;
  1612. { TReorderUnit }
  1613. function TReorderUnit.IsVirtual() : Boolean;
  1614. begin
  1615. Result := (FVirtualPosition > TReorderLogicalReset.None);
  1616. end;
  1617. class function TReorderUnit.From(
  1618. const AChars,
  1619. AContext : array of TUnicodeCodePoint;
  1620. const AWeigthKind : TReorderWeigthKind;
  1621. const AInitialPosition : Integer
  1622. ) : TReorderUnit;
  1623. var
  1624. c : Integer;
  1625. begin
  1626. Result.Clear();
  1627. c := Length(AChars);
  1628. SetLength(Result.Characters,c);
  1629. if (c > 0) then
  1630. Move(AChars[0],Result.Characters[0],(c*SizeOf(Result.Characters[0])));
  1631. Result.WeigthKind := AWeigthKind;
  1632. Result.InitialPosition := AInitialPosition;
  1633. Result.Changed := False;
  1634. c := Length(AContext);
  1635. SetLength(Result.Context,c);
  1636. if (c > 0) then
  1637. Move(AContext[0],Result.Context[0],(c*SizeOf(Result.Context[0])));
  1638. end;
  1639. class function TReorderUnit.From(
  1640. const AChars : array of TUnicodeCodePoint;
  1641. const AWeigthKind : TReorderWeigthKind;
  1642. const AInitialPosition : Integer
  1643. ) : TReorderUnit;
  1644. begin
  1645. Result := From(AChars,[],AWeigthKind,AInitialPosition);
  1646. end;
  1647. class function TReorderUnit.From(
  1648. const AChar : TUnicodeCodePoint;
  1649. const AWeigthKind : TReorderWeigthKind;
  1650. const AInitialPosition : Integer
  1651. ) : TReorderUnit;
  1652. begin
  1653. Result := From([AChar],AWeigthKind,AInitialPosition);
  1654. end;
  1655. class function TReorderUnit.From(
  1656. const AChar : TUnicodeCodePoint;
  1657. const AContext : array of TUnicodeCodePoint;
  1658. const AWeigthKind : TReorderWeigthKind;
  1659. const AInitialPosition : Integer
  1660. ) : TReorderUnit;
  1661. begin
  1662. Result := From([AChar],AContext,AWeigthKind,AInitialPosition);
  1663. end;
  1664. class function TReorderUnit.From(const AReset: TReorderLogicalReset): TReorderUnit;
  1665. begin
  1666. Result.Clear();
  1667. Result.FVirtualPosition := AReset;
  1668. end;
  1669. procedure TReorderUnit.SetExpansion(const AChars: array of TUnicodeCodePoint);
  1670. var
  1671. c : Integer;
  1672. begin
  1673. if IsVirtual() then
  1674. raise ECldrException.Create(sVirtualIsReadOnly);
  1675. c := Length(AChars);
  1676. SetLength(ExpansionChars,c);
  1677. if (c > 0) then
  1678. Move(AChars[0],ExpansionChars[0],(c*SizeOf(AChars[0])));
  1679. end;
  1680. procedure TReorderUnit.SetExpansion(const AChar: TUnicodeCodePoint);
  1681. begin
  1682. if IsVirtual() then
  1683. raise ECldrException.Create(sVirtualIsReadOnly);
  1684. SetExpansion([AChar]);
  1685. end;
  1686. procedure TReorderUnit.Clear();
  1687. begin
  1688. Self.FVirtualPosition := TReorderLogicalReset(0);
  1689. Self.Characters := nil;
  1690. Self.Context := nil;
  1691. Self.ExpansionChars := nil;
  1692. Self.InitialPosition := 0;
  1693. Self.WeigthKind := TReorderWeigthKind(0);
  1694. Self.Changed := False;
  1695. end;
  1696. procedure TReorderUnit.Assign(const AItem : PReorderUnit);
  1697. begin
  1698. Clear();
  1699. if (AItem <> nil) then begin
  1700. Self.FVirtualPosition := AItem^.VirtualPosition;
  1701. Self.Characters := Copy(AItem^.Characters);
  1702. //SetLength(Self.Context,Length(AItem^.Context));
  1703. Self.Context := Copy(AItem^.Context);
  1704. Self.ExpansionChars := Copy(AItem^.ExpansionChars);
  1705. Self.WeigthKind := AItem^.WeigthKind;
  1706. Self.InitialPosition := AItem^.InitialPosition;
  1707. Self.Changed := AItem^.Changed;
  1708. end;
  1709. end;
  1710. function TReorderUnit.HasContext() : Boolean;
  1711. begin
  1712. Result := (Length(Context) > 0);
  1713. end;
  1714. function TReorderUnit.IsExpansion() : Boolean;
  1715. begin
  1716. Result := (Length(ExpansionChars) > 0);
  1717. end;
  1718. { TOrderedCharacters }
  1719. procedure TOrderedCharacters.EnsureSize(const AMinSize : Integer);
  1720. var
  1721. c : Integer;
  1722. begin
  1723. if (AMinSize > Length(Data)) then begin
  1724. if (AMinSize > 1000) then
  1725. c := AMinSize + 100
  1726. else
  1727. c := (3*AMinSize) div 2 ;
  1728. SetLength(Data,c);
  1729. end;
  1730. FActualLength := AMinSize;
  1731. end;
  1732. class function TOrderedCharacters.Create(const ACapacity : Integer) : TOrderedCharacters;
  1733. begin
  1734. if (ACapacity < 0) then
  1735. raise ERangeError.Create(SRangeError);
  1736. Result.FActualLength := 0;
  1737. SetLength(Result.Data,ACapacity);
  1738. end;
  1739. class function TOrderedCharacters.Create() : TOrderedCharacters;
  1740. begin
  1741. Result := Create(0);
  1742. end;
  1743. procedure TOrderedCharacters.Clear;
  1744. begin
  1745. Data := nil;
  1746. FActualLength := 0;
  1747. end;
  1748. function TOrderedCharacters.Clone() : TOrderedCharacters;
  1749. var
  1750. i : Integer;
  1751. begin
  1752. Result.Clear();
  1753. SetLength(Result.Data,Self.ActualLength);
  1754. for i := 0 to Length(Result.Data) - 1 do
  1755. Result.Data[i].Assign(@Self.Data[i]);
  1756. Result.FActualLength := Self.FActualLength;
  1757. end;
  1758. function TOrderedCharacters.Insert(
  1759. const AItem : TReorderUnit;
  1760. const ADestPos : Integer
  1761. ) : Integer;
  1762. var
  1763. k, finalPos : Integer;
  1764. p : PReorderUnit;
  1765. i, c : Integer;
  1766. begin
  1767. if (ActualLength=0) then begin
  1768. EnsureSize(ActualLength + 1);
  1769. p := @Data[0];
  1770. p^.Assign(@AItem);
  1771. p^.Changed := True;
  1772. exit(0);
  1773. end;
  1774. k := IndexOf(AItem.Characters,AItem.Context,@Data[0],ActualLength);
  1775. if (k = ADestPos) then begin
  1776. Data[ADestPos].Assign(@AItem);
  1777. Data[ADestPos].Changed := True;
  1778. exit(k);
  1779. end;
  1780. finalPos := ADestPos;
  1781. if (finalPos > ActualLength) then
  1782. finalPos := ActualLength;
  1783. c := ActualLength;
  1784. EnsureSize(ActualLength + 1);
  1785. Data[c].Clear();
  1786. p := @Data[finalPos];
  1787. if (finalPos = ActualLength) then begin
  1788. p^.Assign(@AItem);
  1789. p^.Changed := True;
  1790. end else begin
  1791. if (c > 0) then begin
  1792. p := @Data[c-1];
  1793. for i := finalPos to c - 1 do begin
  1794. Move(p^,(p+1)^,SizeOf(p^));
  1795. Dec(p);
  1796. end;
  1797. end;
  1798. p := @Data[finalPos];
  1799. {Move(
  1800. Pointer(p)^,Pointer(@p[1])^,
  1801. (ActualLength-(finalPos+1))*SizeOf(TReorderUnit)
  1802. );}
  1803. FillChar(Pointer(p)^,SizeOf(TReorderUnit),0);
  1804. p^.Assign(@AItem);
  1805. p^.Changed := True;
  1806. end;
  1807. if (k >= 0) then begin
  1808. if (k > finalPos) then
  1809. Inc(k);
  1810. Delete(k);
  1811. end;
  1812. Result := finalPos;
  1813. end;
  1814. function TOrderedCharacters.Append(const AItem : TReorderUnit) : Integer;
  1815. begin
  1816. Result := Insert(AItem,ActualLength);
  1817. end;
  1818. procedure TOrderedCharacters.Delete(const AIndex : Integer);
  1819. var
  1820. i : Integer;
  1821. p : PReorderUnit;
  1822. begin
  1823. if (AIndex < 0) or (AIndex >= ActualLength) then
  1824. raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
  1825. if (AIndex = (ActualLength-1)) then begin
  1826. Data[AIndex].Clear();
  1827. end else begin
  1828. //Data[AIndex].Clear();
  1829. p := @Data[AIndex];
  1830. p^.Clear();
  1831. for i := AIndex to ActualLength-2 do begin
  1832. Move((p+1)^,p^,SizeOf(p^));
  1833. Inc(p);
  1834. end;
  1835. {Move(
  1836. Pointer(@Data[(AIndex+1)])^,Pointer(@Data[AIndex])^,
  1837. (ActualLength-(AIndex+1))*SizeOf(TReorderUnit)
  1838. );}
  1839. FillChar(Pointer(@Data[(FActualLength-1)])^,SizeOf(TReorderUnit),0);
  1840. end;
  1841. FActualLength := FActualLength - 1;
  1842. end;
  1843. procedure TOrderedCharacters.ApplyStatement(const AStatement : PReorderSequence);
  1844. begin
  1845. ApplyStatementToSequence(Self,AStatement,1);
  1846. end;
  1847. function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
  1848. begin
  1849. if (ACollation.ItemCount = 0) then
  1850. exit('');
  1851. if (ACollation.IndexOf(ACollation.DefaultType) <> -1) then
  1852. exit(ACollation.DefaultType);
  1853. Result := 'standard';
  1854. if (ACollation.IndexOf(Result) <> -1) then
  1855. exit;
  1856. Result := 'search';
  1857. if (ACollation.IndexOf(Result) <> -1) then
  1858. exit;
  1859. if (ACollation.ItemCount > 0) then
  1860. Result := ACollation.Items[0].TypeName;
  1861. end;
  1862. procedure GenerateUCA_CLDR_Head(
  1863. ADest : TStream;
  1864. ABook : PUCA_DataBook;
  1865. AProps : PUCA_PropBook;
  1866. ACollation : TCldrCollationItem
  1867. );
  1868. procedure AddLine(const ALine : ansistring);
  1869. var
  1870. buffer : ansistring;
  1871. begin
  1872. buffer := ALine + sLineBreak;
  1873. ADest.Write(buffer[1],Length(buffer));
  1874. end;
  1875. procedure AddFields();
  1876. var
  1877. kc : Integer;
  1878. e : TCollationField;
  1879. ks : string;
  1880. ti : PTypeInfo;
  1881. begin
  1882. ti := TypeInfo(TCollationField);
  1883. ks := '';
  1884. kc := 0;
  1885. for e := Low(TCollationField) to High(TCollationField) do begin
  1886. if (e in ACollation.ChangedFields) then begin
  1887. ks := ks + ti^.Name + '.' +
  1888. GetEnumName(ti,Ord(e)) + ', ';
  1889. kc := kc + 1;
  1890. end
  1891. end;
  1892. if (AProps <> nil) then begin
  1893. if (AProps^.VariableLowLimit < High(Word)) then begin
  1894. ks := ks + ti^.Name + '.' +
  1895. GetEnumName(ti,Ord(TCollationField.VariableLowLimit)) + ', ';
  1896. kc := kc + 1;
  1897. end;
  1898. if (AProps^.VariableHighLimit > 0) then begin
  1899. ks := ks + ti^.Name + '.' +
  1900. GetEnumName(ti,Ord(TCollationField.VariableHighLimit)) + ', ';
  1901. kc := kc + 1;
  1902. end;
  1903. end;
  1904. if (kc > 0) then
  1905. ks := Copy(ks,1,(Length(ks)-2));
  1906. AddLine(' UPDATED_FIELDS = [ ' + ks + ' ];');
  1907. end;
  1908. begin
  1909. AddLine('{$mode objfpc}{$H+}');
  1910. AddLine('unit ' + COLLATION_FILE_PREFIX + LowerCase(ACollation.Parent.LocalID)+ ';'+sLineBreak);
  1911. AddLine('interface'+sLineBreak);
  1912. AddLine('implementation');
  1913. AddLine('uses');
  1914. AddLine(' unicodedata, unicodeducet;'+sLineBreak);
  1915. AddLine('const');
  1916. AddFields();
  1917. AddLine(' COLLATION_NAME = ' + QuotedStr(ACollation.Parent.Language) + ';');
  1918. AddLine(' BASE_COLLATION = ' + QuotedStr(ACollation.Base) + ';');
  1919. AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
  1920. if (AProps <> nil) then begin
  1921. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
  1922. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
  1923. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
  1924. end else begin
  1925. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(High(Word)) + ';');
  1926. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(0) + ';');
  1927. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(0) + ';');
  1928. end;
  1929. AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
  1930. AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
  1931. AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
  1932. AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
  1933. if (AProps <> nil) then
  1934. AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
  1935. AddLine('');
  1936. end;
  1937. procedure GenerateUCA_CLDR_Registration(
  1938. ADest : TStream;
  1939. ABook : PUCA_DataBook
  1940. );
  1941. procedure AddLine(const ALine : ansistring);
  1942. var
  1943. buffer : ansistring;
  1944. begin
  1945. buffer := ALine + sLineBreak;
  1946. ADest.Write(buffer[1],Length(buffer));
  1947. end;
  1948. begin
  1949. AddLine('var');
  1950. AddLine(' CLDR_Collation : TUCA_DataBook = (');
  1951. AddLine(' Base : nil;');
  1952. AddLine(' Version : VERSION_STRING;');
  1953. AddLine(' CollationName : COLLATION_NAME;');
  1954. AddLine(' VariableWeight : TUCA_VariableKind(VARIABLE_WEIGHT);');
  1955. AddLine(' Backwards : (BACKWARDS_0,BACKWARDS_1,BACKWARDS_2,BACKWARDS_3);');
  1956. if (Length(ABook^.Lines) > 0) then begin
  1957. AddLine(' BMP_Table1 : @UCA_TABLE_1[0];');
  1958. AddLine(' BMP_Table2 : @UCA_TABLE_2[0];');
  1959. AddLine(' OBMP_Table1 : @UCAO_TABLE_1[0];');
  1960. AddLine(' OBMP_Table2 : @UCAO_TABLE_2[0];');
  1961. AddLine(' PropCount : PROP_COUNT;');
  1962. AddLine(' Props : PUCA_PropItemRec(@UCA_PROPS[0]);');
  1963. end else begin
  1964. AddLine(' BMP_Table1 : nil;');
  1965. AddLine(' BMP_Table2 : nil;');
  1966. AddLine(' OBMP_Table1 : nil;');
  1967. AddLine(' OBMP_Table2 : nil;');
  1968. AddLine(' PropCount : 0;');
  1969. AddLine(' Props : nil;');
  1970. end;
  1971. AddLine(' VariableLowLimit : VARIABLE_LOW_LIMIT;');
  1972. AddLine(' VariableHighLimit : VARIABLE_HIGH_LIMIT;');
  1973. AddLine(' );');
  1974. AddLine('');
  1975. AddLine('procedure Register();');
  1976. AddLine('begin');
  1977. AddLine(' PrepareCollation(@CLDR_Collation,BASE_COLLATION,UPDATED_FIELDS);');
  1978. AddLine(' RegisterCollation(@CLDR_Collation);');
  1979. AddLine('end;');
  1980. AddLine('');
  1981. AddLine('initialization');
  1982. AddLine(' Register();');
  1983. AddLine('');
  1984. AddLine('finalization');
  1985. AddLine(' UnregisterCollation(COLLATION_NAME);');
  1986. AddLine('');
  1987. AddLine('end.');
  1988. end;
  1989. procedure CheckEndianTransform(const ASource : PUCA_PropBook);
  1990. var
  1991. x, y : array of Byte;
  1992. px, py : PUCA_PropItemRec;
  1993. begin
  1994. if (ASource = nil) or (ASource^.ItemSize = 0) then
  1995. exit;
  1996. SetLength(x,ASource^.ItemSize);
  1997. px := PUCA_PropItemRec(@x[0]);
  1998. ReverseFromNativeEndian(ASource^.Items,ASource^.ItemSize,px);
  1999. SetLength(y,ASource^.ItemSize);
  2000. py := PUCA_PropItemRec(@y[0]);
  2001. ReverseToNativeEndian(px,ASource^.ItemSize,py);
  2002. if not CompareMem(ASource^.Items,@y[0],Length(x)) then
  2003. CompareProps(ASource^.Items, PUCA_PropItemRec(@y[0]),ASource^.ItemSize);
  2004. end;
  2005. function ForEachRule(
  2006. ACollationType : TCldrCollationItem;
  2007. AVisitFunc : TRuleVisiterFunction;
  2008. ACustomData : Pointer
  2009. ) : Boolean;
  2010. var
  2011. i : Integer;
  2012. locImport : TCldrImport;
  2013. locRep : TCldrCollationRepository;
  2014. locCollation : TCldrCollation;
  2015. locType : TCldrCollationItem;
  2016. locRules : TReorderSequenceArray;
  2017. begin
  2018. Result := False;
  2019. if not Assigned(AVisitFunc) then
  2020. exit;
  2021. if (ACollationType.Imports.Count > 0) then begin
  2022. locRep := ACollationType.Parent.Repository;
  2023. for i := 0 to ACollationType.Imports.Count-1 do begin
  2024. locImport := ACollationType.Imports[i];
  2025. locCollation := locRep.Load(locImport.Source,TCldrParserMode.FullParsing);
  2026. locType := locCollation.Find(locImport.TypeName);
  2027. if (locType = nil) then
  2028. raise ECldrException.CreateFmt(sCollationTypeNotFound,[locImport.TypeName]);
  2029. if not ForEachRule(locType,AVisitFunc,ACustomData) then
  2030. exit;
  2031. end;
  2032. end;
  2033. locRules := ACollationType.Rules;
  2034. for i := Low(locRules) to High(locRules) do begin
  2035. if not AVisitFunc(@locRules[i],ACollationType,ACustomData) then
  2036. exit;
  2037. end;
  2038. Result := True;
  2039. end;
  2040. procedure GenerateCdlrCollation(
  2041. ACollation : TCldrCollation;
  2042. AItemName : string;
  2043. AStoreName : string;
  2044. AStream,
  2045. ANativeEndianStream,
  2046. AOtherEndianStream,
  2047. ABinaryNativeEndianStream,
  2048. ABinaryOtherEndianStream : TStream;
  2049. ARootChars : TOrderedCharacters;
  2050. ARootWeigths : TUCA_LineRecArray
  2051. );
  2052. procedure AddLine(const ALine : ansistring; ADestStream : TStream);
  2053. var
  2054. buffer : ansistring;
  2055. begin
  2056. buffer := ALine + sLineBreak;
  2057. ADestStream.Write(buffer[1],Length(buffer));
  2058. end;
  2059. var
  2060. locUcaBook : TUCA_DataBook;
  2061. locSequence : TOrderedCharacters;
  2062. locItem : TCldrCollationItem;
  2063. i : Integer;
  2064. locUcaProps : PUCA_PropBook;
  2065. ucaFirstTable : TucaBmpFirstTable;
  2066. ucaSecondTable : TucaBmpSecondTable;
  2067. ucaoFirstTable : TucaoBmpFirstTable;
  2068. ucaoSecondTable : TucaOBmpSecondTable;
  2069. locHasProps : Boolean;
  2070. s : string;
  2071. serializedHeader : TSerializedCollationHeader;
  2072. e : TCollationField;
  2073. begin
  2074. locItem := ACollation.Find(AItemName);
  2075. if (locItem = nil) then
  2076. raise Exception.CreateFmt('Collation Item not found : "%s".',[AItemName]);
  2077. locSequence := ARootChars.Clone();
  2078. for i := 0 to Length(locItem.Rules) - 1 do
  2079. locSequence.ApplyStatement(@locItem.Rules[i]);
  2080. FillChar(locUcaBook,SizeOf(locUcaBook),0);
  2081. locUcaBook.Version := ACollation.Version;
  2082. locUcaBook.Backwards[1] := locItem.Backwards;
  2083. ComputeWeigths(@locSequence.Data[0],locSequence.ActualLength,ARootWeigths,locUcaBook.Lines);
  2084. for i := 0 to Length(locUcaBook.Lines) - 1 do
  2085. locUcaBook.Lines[i].Stored := True;
  2086. locHasProps := (Length(locUcaBook.Lines) > 0);
  2087. if not locHasProps then
  2088. locUcaProps := nil
  2089. else
  2090. MakeUCA_Props(@locUcaBook,locUcaProps);
  2091. try
  2092. CheckEndianTransform(locUcaProps);
  2093. if locHasProps then begin
  2094. MakeUCA_BmpTables(ucaFirstTable,ucaSecondTable,locUcaProps);
  2095. SetLength(ucaoSecondTable,100);
  2096. MakeUCA_OBmpTables(ucaoFirstTable,ucaoSecondTable,locUcaProps);
  2097. end;
  2098. GenerateLicenceText(AStream);
  2099. GenerateUCA_CLDR_Head(AStream,@locUcaBook,locUcaProps,locItem);
  2100. if locHasProps then begin
  2101. GenerateUCA_BmpTables(AStream,ANativeEndianStream,AOtherEndianStream,ucaFirstTable,ucaSecondTable);
  2102. GenerateUCA_OBmpTables(AStream,ANativeEndianStream,AOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
  2103. GenerateUCA_PropTable(ANativeEndianStream,locUcaProps,ENDIAN_NATIVE);
  2104. GenerateUCA_PropTable(AOtherEndianStream,locUcaProps,ENDIAN_NON_NATIVE);
  2105. AddLine('{$ifdef FPC_LITTLE_ENDIAN}',AStream);
  2106. s := GenerateEndianIncludeFileName(AStoreName,ekLittle);
  2107. AddLine(Format(' {$include %s}',[ExtractFileName(s)]),AStream);
  2108. AddLine('{$else FPC_LITTLE_ENDIAN}',AStream);
  2109. s := GenerateEndianIncludeFileName(AStoreName,ekBig);
  2110. AddLine(Format(' {$include %s}',[ExtractFileName(s)]),AStream);
  2111. AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
  2112. end;
  2113. GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
  2114. FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
  2115. serializedHeader.Base := locItem.Base;
  2116. serializedHeader.Version := ACollation.Version;
  2117. serializedHeader.CollationName := ACollation.Language;
  2118. serializedHeader.VariableWeight := Ord(locUcaBook.VariableWeight);
  2119. SetBit(serializedHeader.Backwards,0,locUcaBook.Backwards[0]);
  2120. SetBit(serializedHeader.Backwards,1,locUcaBook.Backwards[1]);
  2121. SetBit(serializedHeader.Backwards,2,locUcaBook.Backwards[2]);
  2122. SetBit(serializedHeader.Backwards,3,locUcaBook.Backwards[3]);
  2123. if locHasProps then begin
  2124. serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
  2125. serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
  2126. (Length(ucaSecondTable) * SizeOf(UInt24));
  2127. serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
  2128. serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
  2129. (Length(ucaoSecondTable) * SizeOf(UInt24));
  2130. serializedHeader.PropCount := locUcaProps^.ItemSize;
  2131. serializedHeader.VariableLowLimit := locUcaProps^.VariableLowLimit;
  2132. serializedHeader.VariableHighLimit := locUcaProps^.VariableHighLimit;
  2133. end else begin
  2134. serializedHeader.VariableLowLimit := High(Word);
  2135. serializedHeader.VariableHighLimit := 0;
  2136. end;
  2137. serializedHeader.ChangedFields := 0;
  2138. for e := Low(TCollationField) to High(TCollationField) do begin
  2139. if (e in locItem.ChangedFields) then
  2140. SetBit(serializedHeader.ChangedFields,Ord(e),True);
  2141. end;
  2142. ABinaryNativeEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
  2143. ReverseRecordBytes(serializedHeader);
  2144. ABinaryOtherEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
  2145. if locHasProps then begin
  2146. GenerateBinaryUCA_BmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaFirstTable,ucaSecondTable);
  2147. GenerateBinaryUCA_OBmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
  2148. GenerateBinaryUCA_PropTable(ABinaryNativeEndianStream,ABinaryOtherEndianStream,locUcaProps);
  2149. end;
  2150. finally
  2151. locSequence.Clear();
  2152. FreeUcaBook(locUcaProps);
  2153. end;
  2154. end;
  2155. end.