cldrhelper.pas 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709
  1. { CLDR collation helper unit.
  2. Copyright (c) 2013 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. unit cldrhelper;
  17. {$mode objfpc}
  18. {$H+}
  19. {$PACKENUM 1}
  20. {$modeswitch advancedrecords}
  21. {$scopedenums on}
  22. {$typedaddress on}
  23. interface
  24. uses
  25. SysUtils, Classes, helper;
  26. const
  27. COLLATION_FILE_PREFIX = 'collation_';
  28. type
  29. TUCA_LineRecArray = array of TUCA_LineRec;
  30. //----------------------------------------------------
  31. ECldrException = class(Exception)
  32. end;
  33. TReorderWeigthKind = (
  34. Primary, Secondary, Tertiary, Identity, Deletion
  35. );
  36. TReorderWeigthKinds = set of TReorderWeigthKind;
  37. TReorderLogicalReset = (
  38. None,// FirstVariable, LastVariable,
  39. FirstTertiaryIgnorable, LastTertiaryIgnorable,
  40. FirstSecondaryIgnorable, LastSecondaryIgnorable,
  41. FirstPrimaryIgnorable, LastPrimaryIgnorable,
  42. LastRegular,
  43. FirstNonIgnorable, LastNonIgnorable,
  44. FirstTrailing, LastTrailing
  45. );
  46. TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
  47. TCollationFields = set of TCollationField;
  48. { TReorderUnit }
  49. TReorderUnit = packed record
  50. public
  51. Context : TUnicodeCodePointArray;
  52. ExpansionChars : TUnicodeCodePointArray;
  53. Characters : TUnicodeCodePointArray;
  54. WeigthKind : TReorderWeigthKind;
  55. InitialPosition : Integer;
  56. Changed : Boolean;
  57. public
  58. class function From(
  59. const AChars,
  60. AContext : array of TUnicodeCodePoint;
  61. const AWeigthKind : TReorderWeigthKind;
  62. const AInitialPosition : Integer
  63. ) : TReorderUnit;static;overload;
  64. class function From(
  65. const AChars : array of TUnicodeCodePoint;
  66. const AWeigthKind : TReorderWeigthKind;
  67. const AInitialPosition : Integer
  68. ) : TReorderUnit;static;overload;
  69. class function From(
  70. const AChar : TUnicodeCodePoint;
  71. const AWeigthKind : TReorderWeigthKind;
  72. const AInitialPosition : Integer
  73. ) : TReorderUnit;static;overload;
  74. class function From(
  75. const AChar : TUnicodeCodePoint;
  76. const AContext : array of TUnicodeCodePoint;
  77. const AWeigthKind : TReorderWeigthKind;
  78. const AInitialPosition : Integer
  79. ) : TReorderUnit;static;overload;
  80. procedure SetExpansion(const AChars : array of TUnicodeCodePoint);
  81. procedure SetExpansion(const AChar : TUnicodeCodePoint);
  82. procedure Clear();
  83. procedure Assign(const AItem : TReorderUnit);
  84. function HasContext() : Boolean;
  85. function IsExpansion() : Boolean;
  86. end;
  87. PReorderUnit = ^TReorderUnit;
  88. { TReorderSequence }
  89. TReorderSequence = packed record
  90. public
  91. Reset : array of TUnicodeCodePoint;
  92. Elements : array of TReorderUnit;
  93. LogicalPosition : TReorderLogicalReset;
  94. Before : Boolean;
  95. public
  96. procedure Clear();
  97. end;
  98. PReorderSequence = ^TReorderSequence;
  99. TReorderSequenceArray = array of TReorderSequence;
  100. { TOrderedCharacters }
  101. TOrderedCharacters = record
  102. private
  103. FActualLength : Integer;
  104. private
  105. procedure EnsureSize(const AMinSize : Integer);
  106. public
  107. Data : array of TReorderUnit;
  108. property ActualLength : Integer read FActualLength;
  109. public
  110. class function Create(const ACapacity : Integer) : TOrderedCharacters;static;overload;
  111. class function Create() : TOrderedCharacters;static;overload;
  112. procedure Clear();
  113. function Clone() : TOrderedCharacters;
  114. function Insert(const AItem : TReorderUnit; const ADestPos : Integer) : Integer;
  115. function Append(const AItem : TReorderUnit) : Integer;
  116. procedure Delete(const AIndex : Integer);
  117. procedure ApplyStatement(const AStatement : PReorderSequence);
  118. end;
  119. POrderedCharacters = ^TOrderedCharacters;
  120. TCldrCollation = class;
  121. { TCldrCollationItem }
  122. TCldrCollationItem = class
  123. private
  124. FBackwards: Boolean;
  125. FBase: string;
  126. FChangedFields: TCollationFields;
  127. FParent: TCldrCollation;
  128. FRules: TReorderSequenceArray;
  129. FTypeName: string;
  130. public
  131. procedure Clear();
  132. property Parent : TCldrCollation read FParent;
  133. property TypeName : string read FTypeName write FTypeName;
  134. property Base : string read FBase write FBase;
  135. property Backwards : Boolean read FBackwards write FBackwards;
  136. property Rules : TReorderSequenceArray read FRules write FRules;
  137. property ChangedFields : TCollationFields read FChangedFields write FChangedFields;
  138. end;
  139. { TCldrCollation }
  140. TCldrCollation = class
  141. private
  142. FItems : array of TCldrCollationItem;
  143. FLocalID: string;
  144. FDefaultType: string;
  145. FVersion: string;
  146. FLanguage: string;
  147. private
  148. function GetItem(Index : Integer): TCldrCollationItem;
  149. function GetItemCount: Integer;
  150. public
  151. destructor Destroy();override;
  152. procedure Clear();
  153. function IndexOf(const AItemName : string) : Integer;
  154. function Find(const AItemName : string) : TCldrCollationItem;
  155. function Add(AItem : TCldrCollationItem) : Integer;
  156. property Language : string read FLanguage write FLanguage;
  157. property LocalID : string read FLocalID write FLocalID;
  158. property Version : string read FVersion write FVersion;
  159. property DefaultType : string read FDefaultType write FDefaultType;
  160. property ItemCount : Integer read GetItemCount;
  161. property Items[Index : Integer] : TCldrCollationItem read GetItem;
  162. end;
  163. TCldrParserMode = (HeaderParsing, FullParsing);
  164. function ComputeWeigths(
  165. const AData : PReorderUnit;
  166. const ADataLen : Integer;
  167. const ADataWeigths : TUCA_LineRecArray;
  168. out AResult : TUCA_LineRecArray
  169. ) : Integer;
  170. function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
  171. procedure GenerateCdlrCollation(
  172. ACollation : TCldrCollation;
  173. AItemName : string;
  174. AStoreName : string;
  175. AStream,
  176. ANativeEndianStream,
  177. AOtherEndianStream : TStream;
  178. ARootChars : TOrderedCharacters;
  179. ARootWeigths : TUCA_LineRecArray
  180. );
  181. procedure GenerateUCA_CLDR_Head(
  182. ADest : TStream;
  183. ABook : PUCA_DataBook;
  184. AProps : PUCA_PropBook;
  185. ACollation : TCldrCollationItem
  186. );
  187. function FillInitialPositions(
  188. AData : PReorderUnit;
  189. const ADataLen : Integer;
  190. const ADataWeigths : TUCA_LineRecArray
  191. ) : Integer;
  192. function IndexOf(
  193. const APattern : array of TUnicodeCodePoint;
  194. const APatternContext : array of TUnicodeCodePoint;
  195. const ASequence : PReorderUnit;
  196. const ASequenceLength : Integer
  197. ) : Integer;
  198. implementation
  199. uses
  200. RtlConsts, typinfo;
  201. function ToStr(const ACharacters : array of TUnicodeCodePoint): string;
  202. var
  203. i : Integer;
  204. begin
  205. Result := '';
  206. for i := Low(ACharacters) to High(ACharacters) do begin
  207. if (ACharacters[i] > $FFFF) then
  208. Result := Result + ' ' + IntToHex(ACharacters[i],5)
  209. else
  210. Result := Result + ' ' + IntToHex(ACharacters[i],4);
  211. end;
  212. Result := Trim(Result);
  213. end;
  214. function IndexOf(
  215. const APattern : array of TUnicodeCodePoint;
  216. const APatternContext : array of TUnicodeCodePoint;
  217. const ASequence : PReorderUnit;
  218. const ASequenceLength : Integer
  219. ) : Integer;
  220. var
  221. i, lp, sizep, lengthContext, sizeContext : Integer;
  222. p : PReorderUnit;
  223. begin
  224. Result := -1;
  225. if (ASequenceLength = 0) then
  226. exit;
  227. lp := Length(APattern);
  228. if (lp = 0) then
  229. exit;
  230. sizep := lp*SizeOf(TUnicodeCodePoint);
  231. lengthContext := Length(APatternContext);
  232. sizeContext := lengthContext*SizeOf(TUnicodeCodePoint);
  233. p := ASequence;
  234. for i := 0 to ASequenceLength - 1 do begin
  235. if (Length(p^.Characters) = lp) then begin
  236. if CompareMem(@APattern[0],@p^.Characters[0],sizep) then begin
  237. if (Length(p^.Context) = lengthContext) and
  238. ( (lengthContext = 0) or
  239. CompareMem(@p^.Context[0],@APatternContext[0],sizeContext)
  240. )
  241. then begin
  242. Result := i;
  243. Break;
  244. end;
  245. end;
  246. end;
  247. Inc(p);
  248. end;
  249. end;
  250. {procedure ApplyStatementToSequence(
  251. var ASequence : TOrderedCharacters;
  252. const AStatement : PReorderSequence;
  253. const AStatementCount : Integer
  254. );
  255. var
  256. pse, pd : PReorderUnit;
  257. kr : Integer;
  258. function GetNextInsertPos() : Integer;
  259. var
  260. kk : Integer;
  261. begin
  262. if (pse^.WeigthKind = rwkDeletion) then
  263. exit(0);
  264. if (pse^.WeigthKind = rwkIdentity) then
  265. exit(kr + 1);
  266. kk := kr + 1;
  267. pd := @ASequence.Data[kk];
  268. for kk := kk to ASequence.ActualLength - 1 do begin
  269. if (pd^.WeigthKind <= pse^.WeigthKind) then
  270. exit(kk);
  271. Inc(pd);
  272. end;
  273. Result := ASequence.ActualLength;
  274. end;
  275. var
  276. locResetPos, i, k, h : Integer;
  277. pst : PReorderSequence;
  278. begin
  279. pst := AStatement;
  280. for h := 0 to AStatementCount - 1 do begin
  281. locResetPos := -1;
  282. if (Length(pst^.Reset) > 0) then begin
  283. locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength);
  284. if (locResetPos = -1) then
  285. raise ECldrException.CreateFmt('Character(s) not found in sequence : "%s".',[ToStr(pst^.Reset)]);
  286. end;
  287. pse := @pst^.Elements[0];
  288. kr := locResetPos;
  289. k := GetNextInsertPos();
  290. for i := Low(pst^.Elements) to High(pst^.Elements) do begin
  291. k := ASequence.Insert(pse^,k)+1;
  292. Inc(pse);
  293. end;
  294. Inc(pst);
  295. end;
  296. end;}
  297. function FindLogicalPos(
  298. const ASequence : POrderedCharacters;
  299. const APosition : TReorderLogicalReset
  300. ) : Integer;
  301. var
  302. i, c : Integer;
  303. p : PReorderUnit;
  304. firstPos, lastPos : Integer;
  305. begin
  306. Result := 0;
  307. if (ASequence^.ActualLength = 0) then
  308. exit;
  309. p := @ASequence^.Data[0];
  310. c := ASequence^.ActualLength;
  311. if (APosition in [TReorderLogicalReset.FirstTertiaryIgnorable, TReorderLogicalReset.LastTertiaryIgnorable])
  312. then begin
  313. firstPos := -1;
  314. for i := 0 to c - 1 do begin
  315. if (p^.WeigthKind <= TReorderWeigthKind.Tertiary) then begin
  316. firstPos := i;
  317. Break;
  318. end;
  319. Inc(p);
  320. end;
  321. if (firstPos = -1) then
  322. exit(0);
  323. if (APosition = TReorderLogicalReset.FirstTertiaryIgnorable) then
  324. exit(firstPos);
  325. if (p^.WeigthKind < TReorderWeigthKind.Tertiary) then
  326. exit(firstPos);
  327. lastPos := -1;
  328. for i := firstPos + 1 to c - 1 do begin
  329. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  330. lastPos := i;
  331. Break;
  332. end;
  333. Inc(p);
  334. end;
  335. if (lastPos = -1) then
  336. exit(c);
  337. exit(lastPos);
  338. end;
  339. if (APosition in [TReorderLogicalReset.FirstSecondaryIgnorable, TReorderLogicalReset.LastSecondaryIgnorable])
  340. then begin
  341. firstPos := -1;
  342. for i := 0 to c - 1 do begin
  343. if (p^.WeigthKind <= TReorderWeigthKind.Secondary) then begin
  344. firstPos := i;
  345. Break;
  346. end;
  347. Inc(p);
  348. end;
  349. if (firstPos = -1) then
  350. exit(0);
  351. if (APosition = TReorderLogicalReset.FirstSecondaryIgnorable) then
  352. exit(firstPos);
  353. if (p^.WeigthKind < TReorderWeigthKind.Secondary) then
  354. exit(firstPos);
  355. lastPos := -1;
  356. for i := firstPos + 1 to c - 1 do begin
  357. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  358. lastPos := i;
  359. Break;
  360. end;
  361. Inc(p);
  362. end;
  363. if (lastPos = -1) then
  364. exit(c);
  365. exit(lastPos);
  366. end;
  367. if (APosition in [TReorderLogicalReset.FirstPrimaryIgnorable, TReorderLogicalReset.LastPrimaryIgnorable])
  368. then begin
  369. firstPos := -1;
  370. for i := 0 to c - 1 do begin
  371. if (p^.WeigthKind <= TReorderWeigthKind.Primary) then begin
  372. firstPos := i;
  373. Break;
  374. end;
  375. Inc(p);
  376. end;
  377. if (firstPos = -1) then
  378. exit(0);
  379. if (APosition = TReorderLogicalReset.FirstPrimaryIgnorable) then
  380. exit(firstPos);
  381. if (p^.WeigthKind < TReorderWeigthKind.Primary) then
  382. exit(firstPos);
  383. lastPos := -1;
  384. for i := firstPos + 1 to c - 1 do begin
  385. if (p^.WeigthKind <> TReorderWeigthKind.Identity) then begin
  386. lastPos := i;
  387. Break;
  388. end;
  389. Inc(p);
  390. end;
  391. if (lastPos = -1) then
  392. exit(c);
  393. exit(lastPos);
  394. end;
  395. if (APosition = TReorderLogicalReset.FirstNonIgnorable) then begin
  396. firstPos := -1;
  397. for i := 0 to c - 1 do begin
  398. if (p^.WeigthKind <= TReorderWeigthKind.Primary) then begin
  399. firstPos := i;
  400. Break;
  401. end;
  402. Inc(p);
  403. end;
  404. if (firstPos = -1) then
  405. exit(0);
  406. exit(firstPos);
  407. end;
  408. if (APosition = TReorderLogicalReset.LastNonIgnorable) then
  409. exit(c);
  410. end;
  411. procedure ApplyStatementToSequence(
  412. var ASequence : TOrderedCharacters;
  413. const AStatement : PReorderSequence;
  414. const AStatementCount : Integer
  415. );
  416. var
  417. pse, pd : PReorderUnit;
  418. kr : Integer;
  419. pst : PReorderSequence;
  420. function GetNextInsertPos() : Integer;
  421. var
  422. kk : Integer;
  423. begin
  424. if (pse^.WeigthKind = TReorderWeigthKind.Deletion) then
  425. exit(0);
  426. if (pse^.WeigthKind = TReorderWeigthKind.Identity) then
  427. exit(kr + 1);
  428. if not pst^.Before then begin
  429. kk := kr + 1;
  430. if (kk >= ASequence.ActualLength) then
  431. exit(kk);
  432. pd := @ASequence.Data[kk];
  433. for kk := kk to ASequence.ActualLength - 1 do begin
  434. if (pd^.WeigthKind <= pse^.WeigthKind) then
  435. exit(kk);
  436. Inc(pd);
  437. end;
  438. Result := ASequence.ActualLength;
  439. end else begin
  440. if (kr = 0) then
  441. exit(0);
  442. kk := kr;
  443. pd := @ASequence.Data[kk];
  444. if (pd^.WeigthKind = TReorderWeigthKind.Primary) then begin
  445. pd^.WeigthKind := pse^.WeigthKind;
  446. pse^.WeigthKind := TReorderWeigthKind.Primary;
  447. exit(kk);
  448. end;
  449. for kk := kk downto 0 do begin
  450. if (pd^.WeigthKind = TReorderWeigthKind.Deletion) or (pd^.WeigthKind <= pse^.WeigthKind) then begin
  451. if (pd^.WeigthKind > pse^.WeigthKind) then
  452. pd^.WeigthKind := pse^.WeigthKind;
  453. exit(kk);
  454. end;
  455. Dec(pd);
  456. end;
  457. Result := 0;
  458. end;
  459. end;
  460. var
  461. locResetPos, i, k, h : Integer;
  462. begin
  463. if (Length(AStatement^.Elements) = 0) then
  464. exit;
  465. pst := AStatement;
  466. for h := 0 to AStatementCount - 1 do begin
  467. locResetPos := -1;
  468. if (AStatement^.LogicalPosition > TReorderLogicalReset.None) then
  469. locResetPos := FindLogicalPos(@ASequence,AStatement^.LogicalPosition)
  470. else if (Length(pst^.Reset) > 0) then begin
  471. locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength);
  472. {if (locResetPos = -1) then
  473. raise ECldrException.CreateFmt('Character(s) not found in sequence : "%s".',[ToStr(pst^.Reset)]);}
  474. if (locResetPos = -1) then
  475. locResetPos := ASequence.ActualLength;
  476. end;
  477. pse := @pst^.Elements[0];
  478. kr := locResetPos;
  479. k := GetNextInsertPos();
  480. for i := Low(pst^.Elements) to High(pst^.Elements) do begin
  481. k := ASequence.Insert(pse^,k)+1;
  482. Inc(pse);
  483. end;
  484. Inc(pst);
  485. end;
  486. end;
  487. type
  488. PUCA_WeightRecArray = ^TUCA_WeightRecArray;
  489. TUCASortKey = array of Word;
  490. function SimpleFormKey(const ACEList : TUCA_WeightRecArray) : TUCASortKey;
  491. var
  492. r : TUCASortKey;
  493. i, c, k, ral, levelCount : Integer;
  494. pce : ^TUCA_WeightRec;
  495. begin
  496. c := Length(ACEList);
  497. if (c = 0) then
  498. exit(nil);
  499. //SetLength(r,((3+1{Level Separator})*c));
  500. levelCount := Length(ACEList[0].Weights);
  501. if (levelCount > 3) then
  502. levelCount := 3;
  503. SetLength(r,(levelCount*c + levelCount));
  504. ral := 0;
  505. for i := 0 to levelCount - 1 do begin
  506. for k := 0 to c - 1 do begin
  507. pce := @ACEList[k];
  508. if (pce^.Weights[i] <> 0) then begin
  509. r[ral] := pce^.Weights[i];
  510. ral := ral + 1;
  511. end;
  512. //pce := pce + 1;
  513. end;
  514. r[ral] := 0;
  515. ral := ral + 1;
  516. end;
  517. ral := ral - 1;
  518. SetLength(r,ral);
  519. Result := r;
  520. end;
  521. function CompareSortKey(const A, B : TUCASortKey) : Integer;
  522. var
  523. i, hb : Integer;
  524. begin
  525. if (Pointer(A) = Pointer(B)) then
  526. exit(0);
  527. Result := 1;
  528. hb := Length(B) - 1;
  529. for i := 0 to Length(A) - 1 do begin
  530. if (i > hb) then
  531. exit;
  532. if (A[i] < B[i]) then
  533. exit(-1);
  534. if (A[i] > B[i]) then
  535. exit(1);
  536. end;
  537. if (Length(A) = Length(B)) then
  538. exit(0);
  539. exit(-1);
  540. end;
  541. {function ComputeWeigths(
  542. const AData : PReorderUnit;
  543. const ADataLen : Integer;
  544. const ADataWeigths : TUCA_LineRecArray;
  545. out AResult : TUCA_LineRecArray
  546. ) : Integer;
  547. function GetWeigth(AItem : PReorderUnit) : PUCA_WeightRecArray;
  548. begin
  549. Result := nil;
  550. if (AItem^.InitialPosition < 1) or (AItem^.InitialPosition > Length(ADataWeigths)) then
  551. raise ECldrException.CreateFmt('Invalid "InitialPosition" value : %d.',[AItem^.InitialPosition]);
  552. Result := @ADataWeigths[(AItem^.InitialPosition-1)].Weights;
  553. end;
  554. var
  555. c, i, ral : Integer;
  556. p, q : PReorderUnit;
  557. r : TUCA_LineRecArray;
  558. pr : PUCA_LineRec;
  559. pbase : PReorderUnit;
  560. pw, pwb : PUCA_WeightRecArray;
  561. cw, ki : Integer;
  562. begin
  563. Result := 0;
  564. if (ADataLen < 1) then
  565. exit;
  566. c := ADataLen;
  567. ral := 0;
  568. SetLength(r,c);
  569. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  570. q := nil;
  571. pbase := nil;
  572. p := AData+1;
  573. pr := @r[0];
  574. i := 1;
  575. while (i < c) do begin
  576. if p^.Changed then begin
  577. if (pbase = nil) then begin
  578. pbase := p - 1;
  579. pwb := GetWeigth(pbase);
  580. end;
  581. if (p^.WeigthKind = rwkIdentity) then begin
  582. pr^.CodePoints := Copy(p^.Characters);
  583. q := p - 1;
  584. if (q = pbase) then
  585. pw := pwb
  586. else
  587. pw := @((pr-1)^.Weights);
  588. pr^.Weights := Copy(pw^);
  589. Inc(pr);
  590. Inc(ral);
  591. end else begin
  592. pr^.CodePoints := Copy(p^.Characters);
  593. q := p - 1;
  594. if (q = pbase) then begin
  595. pw := pwb;
  596. cw := (Length(pw^)+1);
  597. SetLength(pr^.Weights,cw);
  598. Move(pw^[0],pr^.Weights[0],((cw-1)*SizeOf(pw^[0])));
  599. FillByte(pr^.Weights[(cw-1)],SizeOf(pr^.Weights[0]),0);
  600. ki := Ord(p^.WeigthKind);
  601. pr^.Weights[(cw-1)].Weights[ki] := pr^.Weights[(cw-2)].Weights[ki]+1;
  602. end else begin
  603. pw := @((pr-1)^.Weights);
  604. pr^.Weights := Copy(pw^);
  605. cw := Length(pr^.Weights);
  606. ki := Ord(p^.WeigthKind);
  607. for ki := Ord(rwkPrimary) to Ord(rwkTertiary) do begin
  608. if (ki < Ord(p^.WeigthKind)) then
  609. pr^.Weights[(cw-1)].Weights[ki] := pw^[(cw-1)].Weights[ki]
  610. else if (ki = Ord(p^.WeigthKind)) then begin
  611. if (pw^[(cw-1)].Weights[ki] = 0) then
  612. pr^.Weights[(cw-1)].Weights[ki] := pwb^[(Length(pwb^)-1)].Weights[ki]+1
  613. else
  614. pr^.Weights[(cw-1)].Weights[ki] := pw^[(cw-1)].Weights[ki]+1;
  615. end else begin
  616. pr^.Weights[(cw-1)].Weights[ki] := 0;
  617. end;
  618. end;
  619. end;
  620. Inc(pr);
  621. Inc(ral);
  622. end;
  623. end else begin
  624. pbase := nil;
  625. pwb := nil;
  626. end;
  627. Inc(p);
  628. Inc(i);
  629. end;
  630. SetLength(r,ral);
  631. AResult := r;
  632. Result := Length(AResult);
  633. end;}
  634. function IndexOf(
  635. const APattern : array of TUnicodeCodePoint;
  636. const AList : PUCA_LineRec;
  637. const AListLen : Integer
  638. ) : Integer;
  639. var
  640. i, lengthPattern, sizePattern : Integer;
  641. pl : PUCA_LineRec;
  642. begin
  643. Result := -1;
  644. if (Length(APattern) = 0) then
  645. exit;
  646. if (AListLen = 0) then
  647. exit;
  648. lengthPattern := Length(APattern);
  649. sizePattern := lengthPattern*SizeOf(TUnicodeCodePoint);
  650. pl := AList;
  651. for i := 0 to AListLen - 1 do begin
  652. if (Length(pl^.CodePoints) = lengthPattern) and
  653. CompareMem(@pl^.CodePoints[0],@APattern[0],sizePattern)
  654. then begin
  655. Result := i;
  656. Break;
  657. end;
  658. Inc(pl);
  659. end;
  660. end;
  661. function Compress(
  662. const AData : TUCA_LineRecArray;
  663. out AResult : TUCA_LineRecArray
  664. ) : Boolean;
  665. var
  666. r : TUCA_LineRecArray;
  667. pr, p : PUCA_LineRec;
  668. ral : Integer;
  669. function FindOutSlot() : Boolean;
  670. var
  671. k : Integer;
  672. begin
  673. k := IndexOf(p^.CodePoints,@r[0],ral);
  674. Result := (k >= 0);
  675. if (k = -1) then begin
  676. k := ral;
  677. ral := ral + 1;
  678. end;
  679. pr := @r[k];
  680. end;
  681. procedure AddContextData();
  682. var
  683. k : Integer;
  684. begin
  685. if not p^.HasContext() then
  686. exit;
  687. k := Length(pr^.Context.Data);
  688. SetLength(pr^.Context.Data,(k+1));
  689. pr^.Context.Data[k].CodePoints := Copy(p^.Context.Data[0].CodePoints);
  690. pr^.Context.Data[k].Weights := Copy(p^.Weights);
  691. end;
  692. procedure AddItem();
  693. begin
  694. pr^.Assign(p^);
  695. if p^.HasContext() then begin
  696. SetLength(pr^.Context.Data,0);
  697. pr^.Weights := nil;
  698. AddContextData();
  699. end;
  700. end;
  701. var
  702. c, i : Integer;
  703. begin
  704. c := Length(AData);
  705. if (c = 0) then
  706. exit;
  707. SetLength(r,c);
  708. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  709. pr := @r[0];
  710. p := @AData[0];
  711. ral := 0;
  712. i := 0;
  713. AddItem();
  714. ral := 1;
  715. i := 1;
  716. Inc(p);
  717. while (i < c) do begin
  718. if FindOutSlot() then
  719. AddContextData()
  720. else
  721. AddItem();
  722. Inc(p);
  723. Inc(i);
  724. end;
  725. SetLength(r,ral);
  726. AResult := r;
  727. Result := (ral < Length(AData));
  728. end;
  729. function MarkSuffixAsChanged(
  730. const AData : PReorderUnit;
  731. const ADataLen : Integer
  732. ) : Integer;
  733. var
  734. i, k : Integer;
  735. p, q : PReorderUnit;
  736. suffixChar : TUnicodeCodePoint;
  737. begin
  738. Result := 0;
  739. if (ADataLen <= 1) then
  740. exit;
  741. q := AData;
  742. p := AData;
  743. for i := 0 to ADataLen - 1 do begin
  744. if p^.Changed then begin
  745. suffixChar := p^.Characters[0];
  746. for k := 0 to ADataLen - 1 do begin
  747. if not(q[k].Changed) and (q[k].Characters[0] = suffixChar) then begin
  748. q[k].Changed := True;
  749. Result := Result + 1;
  750. end;
  751. end;
  752. end;
  753. Inc(p);
  754. end;
  755. end;
  756. {$include weight_derivation.inc}
  757. function ComputeWeigths(
  758. const AData : PReorderUnit;
  759. const ADataLen : Integer;
  760. const ADataWeigths : TUCA_LineRecArray;
  761. out AResult : TUCA_LineRecArray
  762. ) : Integer;
  763. function GetWeigth(AItem : PReorderUnit) : PUCA_WeightRecArray;
  764. begin
  765. Result := nil;
  766. if (AItem^.InitialPosition < 1) or (AItem^.InitialPosition > Length(ADataWeigths)) then
  767. raise ECldrException.CreateFmt('Invalid "InitialPosition" value : %d.',[AItem^.InitialPosition]);
  768. Result := @ADataWeigths[(AItem^.InitialPosition-1)].Weights;
  769. end;
  770. var
  771. r : TUCA_LineRecArray;
  772. pr : PUCA_LineRec;
  773. procedure AddContext(const ACodePointPattern : TUnicodeCodePointArray);
  774. var
  775. k : Integer;
  776. begin
  777. k := Length(pr^.Context.Data);
  778. SetLength(pr^.Context.Data,(k+1));
  779. pr^.Context.Data[k].CodePoints := Copy(ACodePointPattern);
  780. SetLength(pr^.Context.Data[k].Weights,0);
  781. end;
  782. var
  783. ral : Integer;
  784. i : Integer;
  785. p : PReorderUnit;
  786. pbase : PReorderUnit;
  787. pwb : PUCA_WeightRecArray;
  788. actualBegin : Boolean;
  789. loopIndex : Integer;
  790. procedure SkipDeletion();
  791. begin
  792. pr^.CodePoints := Copy(p^.Characters);
  793. pr^.Deleted := True;
  794. SetLength(pr^.Weights,0);
  795. if p^.HasContext() then
  796. AddContext(p^.Context);
  797. Inc(pr);
  798. Inc(ral);
  799. Inc(p);
  800. Inc(i);
  801. end;
  802. procedure FindBaseItem();
  803. begin
  804. if (pbase = nil) or (pwb^ = nil) then begin
  805. if actualBegin then begin
  806. pwb := @ADataWeigths[0].Weights;
  807. end else begin
  808. pbase := p - 1;
  809. if pbase^.Changed then
  810. pwb := @((pr-1)^.Weights)
  811. else
  812. pwb := GetWeigth(pbase);
  813. if (pwb^ = nil) and (pbase = AData) then
  814. pwb := @ADataWeigths[0].Weights;
  815. end;
  816. end;
  817. end;
  818. function InternalComputeWeights(const AList : array of TUnicodeCodePointArray) : TUCA_WeightRecArray;
  819. var
  820. kral : Integer;
  821. kres : TUCA_WeightRecArray;
  822. procedure EnsureResultLength(const APlus : Integer);//inline;
  823. begin
  824. if ((kral+APlus) > Length(kres)) then
  825. SetLength(kres,(2*(kral+APlus)));
  826. end;
  827. procedure AddToResult(const AValue : TUCA_WeightRecArray);//inline;
  828. begin
  829. EnsureResultLength(Length(AValue));
  830. Move(AValue[0],kres[kral],(Length(AValue)*SizeOf(kres[0])));
  831. kral := kral + Length(AValue);
  832. end;
  833. var
  834. kc, k, ktempIndex, ki : Integer;
  835. tmpWeight : array of TUCA_PropWeights;
  836. begin
  837. kc := Length(AList);
  838. kral := 0;
  839. SetLength(kres,(10*kc));
  840. FillChar(kres[0],(Length(kres)*SizeOf(kres[0])),0);
  841. for k := 0 to kc - 1 do begin
  842. ktempIndex := IndexOf(AList[k],@r[0],ral);
  843. if (ktempIndex <> -1) then begin
  844. AddToResult(r[ktempIndex].Weights);
  845. Continue;
  846. end;
  847. ktempIndex := IndexOf(AList[k],[],AData,ADataLen);
  848. if (ktempIndex <> -1) then begin
  849. if not AData[ktempIndex].Changed then begin
  850. AddToResult(ADataWeigths[AData[ktempIndex].InitialPosition-1].Weights);
  851. Continue;
  852. end;
  853. end;
  854. if (Length(AList[k]) > 1) then begin
  855. for ki := 0 to Length(AList[k]) - 1 do begin
  856. ktempIndex := IndexOf([AList[k][ki]],@r[0],ral);
  857. if (ktempIndex <> -1) then begin
  858. AddToResult(r[ktempIndex].Weights);
  859. Continue;
  860. end;
  861. ktempIndex := IndexOf([AList[k][ki]],[],AData,ADataLen);
  862. if (ktempIndex <> -1) then begin
  863. if not AData[ktempIndex].Changed then begin
  864. AddToResult(ADataWeigths[AData[ktempIndex].InitialPosition-1].Weights);
  865. Continue;
  866. end;
  867. end;
  868. SetLength(tmpWeight,2);
  869. DeriveWeight(AList[k][ki],@tmpWeight[0]);
  870. EnsureResultLength(2);
  871. kres[kral].Weights[0] := tmpWeight[0].Weights[0];
  872. kres[kral].Weights[1] := tmpWeight[0].Weights[1];
  873. kres[kral].Weights[2] := tmpWeight[0].Weights[2];
  874. kres[kral+1].Weights[0] := tmpWeight[1].Weights[0];
  875. kres[kral+1].Weights[1] := tmpWeight[1].Weights[1];
  876. kres[kral+1].Weights[2] := tmpWeight[1].Weights[2];
  877. kral := kral + 2;
  878. tmpWeight := nil;
  879. end
  880. end;
  881. SetLength(tmpWeight,2);
  882. DeriveWeight(AList[k][0],@tmpWeight[0]);
  883. EnsureResultLength(2);
  884. kres[kral].Weights[0] := tmpWeight[0].Weights[0];
  885. kres[kral].Weights[1] := tmpWeight[0].Weights[1];
  886. kres[kral].Weights[2] := tmpWeight[0].Weights[2];
  887. kres[kral+1].Weights[0] := tmpWeight[1].Weights[0];
  888. kres[kral+1].Weights[1] := tmpWeight[1].Weights[1];
  889. kres[kral+1].Weights[2] := tmpWeight[1].Weights[2];
  890. kral := kral + 2;
  891. tmpWeight := nil;
  892. end;
  893. SetLength(kres,kral);
  894. Result := kres;
  895. end;
  896. procedure Handle_Expansion();
  897. var
  898. expChars : array[0..1] of TUnicodeCodePointArray;
  899. kres : TUCA_WeightRecArray;
  900. begin
  901. expChars[0] := (p-1)^.Characters;
  902. expChars[1] := p^.ExpansionChars;
  903. kres := InternalComputeWeights(expChars);
  904. if (p^.WeigthKind <= TReorderWeigthKind.Tertiary) then
  905. Inc(kres[Length(kres)-1].Weights[Ord(p^.WeigthKind)]);
  906. pr^.Weights := Copy(kres);
  907. end;
  908. var
  909. c, ti : Integer;
  910. q : PReorderUnit;
  911. pw : PUCA_WeightRecArray;
  912. begin
  913. Result := 0;
  914. if (ADataLen < 1) then
  915. exit;
  916. while True do begin
  917. for loopIndex := 0 to 1 do begin
  918. c := ADataLen;
  919. ral := 0;
  920. SetLength(r,c);
  921. FillByte(r[0],(Length(r)*SizeOf(r[0])),0);
  922. q := nil;
  923. pbase := nil;
  924. pr := @r[0];
  925. p := AData;
  926. i := 0;
  927. while (i < c) do begin
  928. if (p^.WeigthKind = TReorderWeigthKind.Deletion) then begin
  929. SkipDeletion();
  930. Continue;
  931. end;
  932. if p^.Changed then begin
  933. actualBegin := (i = 0) or (((p-1)^.WeigthKind = TReorderWeigthKind.Deletion));
  934. FindBaseItem();
  935. if p^.IsExpansion() then begin
  936. if (loopIndex = 0) then begin
  937. Inc(p);
  938. Inc(i);
  939. while (i < c) do begin
  940. if (p^.WeigthKind = TReorderWeigthKind.Primary) then
  941. Break;
  942. Inc(p);
  943. Inc(i);
  944. end;
  945. Continue;
  946. end;
  947. pr^.CodePoints := Copy(p^.Characters);
  948. Handle_Expansion();
  949. if p^.HasContext() then
  950. AddContext(p^.Context);
  951. Inc(pr);
  952. Inc(ral);
  953. end else if actualBegin then begin
  954. pr^.CodePoints := Copy(p^.Characters);
  955. pw := pwb;
  956. pr^.Weights := Copy(pw^);
  957. if p^.HasContext() then
  958. AddContext(p^.Context);
  959. Inc(pr);
  960. Inc(ral);
  961. end else if (p^.WeigthKind = TReorderWeigthKind.Identity) then begin
  962. pr^.CodePoints := Copy(p^.Characters);
  963. q := p - 1;
  964. if (q = pbase) then
  965. pw := pwb
  966. else
  967. pw := @((pr-1)^.Weights);
  968. pr^.Weights := Copy(pw^);
  969. if p^.HasContext() then
  970. AddContext(p^.Context);
  971. Inc(pr);
  972. Inc(ral);
  973. end else begin
  974. pr^.CodePoints := Copy(p^.Characters);
  975. if ((p - 1) = pbase) then begin
  976. if (p^.WeigthKind = TReorderWeigthKind.Primary) then begin
  977. SetLength(pr^.Weights,2);
  978. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  979. pr^.Weights[0].Weights[0] := (pwb^[0].Weights[0] + 1);
  980. pr^.Weights[0].Variable := pwb^[0].Variable;
  981. pr^.Weights[1] := pr^.Weights[0];
  982. end else if (p^.WeigthKind = TReorderWeigthKind.Secondary) then begin
  983. SetLength(pr^.Weights,2);
  984. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  985. pr^.Weights[0].Weights[0] := pwb^[0].Weights[0];
  986. pr^.Weights[0].Weights[1] := (pwb^[0].Weights[1] + 1);
  987. pr^.Weights[0].Variable := pwb^[0].Variable;
  988. pr^.Weights[1].Weights[0] := pr^.Weights[0].Weights[0];
  989. pr^.Weights[1].Variable := pr^.Weights[0].Variable;
  990. end else if (p^.WeigthKind = TReorderWeigthKind.Tertiary) then begin
  991. SetLength(pr^.Weights,2);
  992. FillByte(pr^.Weights[0],(Length(pr^.Weights)*SizeOf(pr^.Weights[0])),0);
  993. pr^.Weights[0].Weights[0] := pwb^[0].Weights[0];
  994. pr^.Weights[0].Weights[1] := pwb^[0].Weights[1];
  995. pr^.Weights[0].Weights[2] := (pwb^[0].Weights[2] + 1);
  996. pr^.Weights[0].Variable := pwb^[0].Variable;
  997. pr^.Weights[1].Weights[0] := pr^.Weights[0].Weights[0];
  998. pr^.Weights[1].Variable := pr^.Weights[0].Variable;
  999. end;
  1000. end else begin
  1001. pr^.Weights := Copy((pr-1)^.Weights);
  1002. if (p^.WeigthKind = TReorderWeigthKind.Primary) then
  1003. Inc(pr^.Weights[1].Weights[Ord(p^.WeigthKind)])
  1004. else
  1005. Inc(pr^.Weights[0].Weights[Ord(p^.WeigthKind)]);
  1006. end;
  1007. if p^.HasContext() then
  1008. AddContext(p^.Context);
  1009. Inc(pr);
  1010. Inc(ral);
  1011. end;
  1012. end else begin
  1013. if (i > 0) and ((p-1)^.WeigthKind <> TReorderWeigthKind.Deletion) and (p-1)^.Changed and
  1014. (ral > 0)
  1015. then begin
  1016. pw := GetWeigth(p);
  1017. ti := CompareSortKey(SimpleFormKey((pr-1)^.Weights),SimpleFormKey(pw^));
  1018. if ( (p^.WeigthKind = TReorderWeigthKind.Identity) and (ti > 0) ) or
  1019. ( (p^.WeigthKind >= TReorderWeigthKind.Primary) and (ti >= 0) )
  1020. then begin
  1021. p^.Changed := True;
  1022. Continue;
  1023. end;
  1024. end;
  1025. pbase := nil;
  1026. pwb := nil;
  1027. end;
  1028. Inc(p);
  1029. Inc(i);
  1030. end;
  1031. end;
  1032. SetLength(r,ral);
  1033. if (MarkSuffixAsChanged(AData,ADataLen) = 0) then
  1034. Break;
  1035. end;
  1036. Compress(r,AResult);
  1037. Result := Length(AResult);
  1038. end;
  1039. function FillInitialPositions(
  1040. AData : PReorderUnit;
  1041. const ADataLen : Integer;
  1042. const ADataWeigths : TUCA_LineRecArray
  1043. ) : Integer;
  1044. var
  1045. locNotFound, i, cw : Integer;
  1046. p : PReorderUnit;
  1047. pw : PUCA_LineRec;
  1048. begin
  1049. locNotFound := 0;
  1050. cw := Length(ADataWeigths);
  1051. if (cw > 0) then
  1052. pw := @ADataWeigths[0]
  1053. else
  1054. pw := nil;
  1055. p := AData;
  1056. for i := 0 to ADataLen - 1 do begin
  1057. p^.InitialPosition := IndexOf(p^.Characters,pw,cw) + 1;
  1058. if (p^.InitialPosition = 0) then
  1059. Inc(locNotFound);
  1060. Inc(p);
  1061. end;
  1062. Result := locNotFound;
  1063. end;
  1064. { TCldrCollationItem }
  1065. procedure TCldrCollationItem.Clear();
  1066. begin
  1067. FBackwards := False;
  1068. FBase := '';
  1069. FChangedFields := [];
  1070. SetLength(FRules,0);
  1071. FTypeName := '';
  1072. end;
  1073. { TCldrCollation }
  1074. function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
  1075. begin
  1076. if (Index < 0) or (Index >= Length(FItems)) then
  1077. raise ERangeError.CreateFmt(SListIndexError,[Index]);
  1078. Result := FItems[Index];
  1079. end;
  1080. function TCldrCollation.GetItemCount: Integer;
  1081. begin
  1082. Result := Length(FItems);
  1083. end;
  1084. destructor TCldrCollation.Destroy;
  1085. begin
  1086. Clear();
  1087. inherited Destroy;
  1088. end;
  1089. procedure TCldrCollation.Clear();
  1090. var
  1091. i : Integer;
  1092. begin
  1093. for i := 0 to Length(FItems) - 1 do
  1094. FreeAndNil(FItems[i]);
  1095. SetLength(FItems,0);
  1096. FLocalID := '';
  1097. FDefaultType := '';
  1098. end;
  1099. function TCldrCollation.IndexOf(const AItemName: string): Integer;
  1100. var
  1101. i : Integer;
  1102. begin
  1103. for i := 0 to ItemCount - 1 do begin
  1104. if SameText(AItemName,Items[i].TypeName) then
  1105. exit(i);
  1106. end;
  1107. Result := -1;
  1108. end;
  1109. function TCldrCollation.Find(const AItemName: string): TCldrCollationItem;
  1110. var
  1111. i : Integer;
  1112. begin
  1113. i := IndexOf(AItemName);
  1114. if (i = - 1) then
  1115. Result := nil
  1116. else
  1117. Result := Items[i];
  1118. end;
  1119. function TCldrCollation.Add(AItem: TCldrCollationItem): Integer;
  1120. begin
  1121. Result := Length(FItems);
  1122. SetLength(FItems,(Result+1));
  1123. FItems[Result] := AItem;
  1124. AItem.FParent := Self;
  1125. end;
  1126. { TReorderSequence }
  1127. procedure TReorderSequence.Clear();
  1128. begin
  1129. Reset := nil;
  1130. Elements := nil;
  1131. LogicalPosition := TReorderLogicalReset(0);
  1132. Before := False;
  1133. end;
  1134. { TReorderUnit }
  1135. class function TReorderUnit.From(
  1136. const AChars,
  1137. AContext : array of TUnicodeCodePoint;
  1138. const AWeigthKind : TReorderWeigthKind;
  1139. const AInitialPosition : Integer
  1140. ) : TReorderUnit;
  1141. var
  1142. c : Integer;
  1143. begin
  1144. c := Length(AChars);
  1145. SetLength(Result.Characters,c);
  1146. if (c > 0) then
  1147. Move(AChars[0],Result.Characters[0],(c*SizeOf(Result.Characters[0])));
  1148. Result.WeigthKind := AWeigthKind;
  1149. Result.InitialPosition := AInitialPosition;
  1150. Result.Changed := False;
  1151. c := Length(AContext);
  1152. SetLength(Result.Context,c);
  1153. if (c > 0) then
  1154. Move(AContext[0],Result.Context[0],(c*SizeOf(Result.Context[0])));
  1155. end;
  1156. class function TReorderUnit.From(
  1157. const AChars : array of TUnicodeCodePoint;
  1158. const AWeigthKind : TReorderWeigthKind;
  1159. const AInitialPosition : Integer
  1160. ) : TReorderUnit;
  1161. begin
  1162. Result := From(AChars,[],AWeigthKind,AInitialPosition);
  1163. end;
  1164. class function TReorderUnit.From(
  1165. const AChar : TUnicodeCodePoint;
  1166. const AWeigthKind : TReorderWeigthKind;
  1167. const AInitialPosition : Integer
  1168. ) : TReorderUnit;
  1169. begin
  1170. Result := From([AChar],AWeigthKind,AInitialPosition);
  1171. end;
  1172. class function TReorderUnit.From(
  1173. const AChar : TUnicodeCodePoint;
  1174. const AContext : array of TUnicodeCodePoint;
  1175. const AWeigthKind : TReorderWeigthKind;
  1176. const AInitialPosition : Integer
  1177. ) : TReorderUnit;
  1178. begin
  1179. Result := From([AChar],AContext,AWeigthKind,AInitialPosition);
  1180. end;
  1181. procedure TReorderUnit.SetExpansion(const AChars: array of TUnicodeCodePoint);
  1182. var
  1183. c : Integer;
  1184. begin
  1185. c := Length(AChars);
  1186. SetLength(ExpansionChars,c);
  1187. if (c > 0) then
  1188. Move(AChars[0],ExpansionChars[0],(c*SizeOf(AChars[0])));
  1189. end;
  1190. procedure TReorderUnit.SetExpansion(const AChar: TUnicodeCodePoint);
  1191. begin
  1192. SetExpansion([AChar]);
  1193. end;
  1194. procedure TReorderUnit.Clear();
  1195. begin
  1196. Self.Characters := nil;
  1197. Self.Context := nil;
  1198. Self.ExpansionChars := nil;
  1199. Self.InitialPosition := 0;
  1200. Self.WeigthKind := TReorderWeigthKind(0);
  1201. Self.Changed := False;
  1202. end;
  1203. procedure TReorderUnit.Assign(const AItem : TReorderUnit);
  1204. begin
  1205. Clear();
  1206. Self.Characters := Copy(AItem.Characters);
  1207. //SetLength(Self.Context,Length(AItem.Context));
  1208. Self.Context := Copy(AItem.Context);
  1209. Self.ExpansionChars := Copy(AItem.ExpansionChars);
  1210. Self.WeigthKind := AItem.WeigthKind;
  1211. Self.InitialPosition := AItem.InitialPosition;
  1212. Self.Changed := AItem.Changed;
  1213. end;
  1214. function TReorderUnit.HasContext() : Boolean;
  1215. begin
  1216. Result := (Length(Context) > 0);
  1217. end;
  1218. function TReorderUnit.IsExpansion() : Boolean;
  1219. begin
  1220. Result := (Length(ExpansionChars) > 0);
  1221. end;
  1222. { TOrderedCharacters }
  1223. procedure TOrderedCharacters.EnsureSize(const AMinSize : Integer);
  1224. var
  1225. c : Integer;
  1226. begin
  1227. if (AMinSize > Length(Data)) then begin
  1228. if (AMinSize > 1000) then
  1229. c := AMinSize + 100
  1230. else
  1231. c := (3*AMinSize) div 2 ;
  1232. SetLength(Data,c);
  1233. end;
  1234. FActualLength := AMinSize;
  1235. end;
  1236. class function TOrderedCharacters.Create(const ACapacity : Integer) : TOrderedCharacters;
  1237. begin
  1238. if (ACapacity < 0) then
  1239. raise ERangeError.Create(SRangeError);
  1240. Result.FActualLength := 0;
  1241. SetLength(Result.Data,ACapacity);
  1242. end;
  1243. class function TOrderedCharacters.Create() : TOrderedCharacters;
  1244. begin
  1245. Result := Create(0);
  1246. end;
  1247. procedure TOrderedCharacters.Clear;
  1248. begin
  1249. Data := nil;
  1250. FActualLength := 0;
  1251. end;
  1252. function TOrderedCharacters.Clone() : TOrderedCharacters;
  1253. var
  1254. i : Integer;
  1255. begin
  1256. Result.Clear();
  1257. SetLength(Result.Data,Self.ActualLength);
  1258. for i := 0 to Length(Result.Data) - 1 do
  1259. Result.Data[i].Assign(Self.Data[i]);
  1260. Result.FActualLength := Self.FActualLength;
  1261. end;
  1262. function TOrderedCharacters.Insert(
  1263. const AItem : TReorderUnit;
  1264. const ADestPos : Integer
  1265. ) : Integer;
  1266. var
  1267. k, finalPos : Integer;
  1268. p : PReorderUnit;
  1269. i, c : Integer;
  1270. begin
  1271. if (ActualLength=0) then begin
  1272. EnsureSize(ActualLength + 1);
  1273. p := @Data[0];
  1274. p^.Assign(AItem);
  1275. p^.Changed := True;
  1276. exit(0);
  1277. end;
  1278. k := IndexOf(AItem.Characters,AItem.Context,@Data[0],ActualLength);
  1279. if (k = ADestPos) then begin
  1280. Data[ADestPos].Assign(AItem);
  1281. Data[ADestPos].Changed := True;
  1282. exit(k);
  1283. end;
  1284. finalPos := ADestPos;
  1285. if (finalPos > ActualLength) then
  1286. finalPos := ActualLength;
  1287. c := ActualLength;
  1288. EnsureSize(ActualLength + 1);
  1289. Data[c].Clear();
  1290. p := @Data[finalPos];
  1291. if (finalPos = ActualLength) then begin
  1292. p^.Assign(AItem);
  1293. p^.Changed := True;
  1294. end else begin
  1295. if (c > 0) then begin
  1296. p := @Data[c-1];
  1297. for i := finalPos to c - 1 do begin
  1298. Move(p^,(p+1)^,SizeOf(p^));
  1299. Dec(p);
  1300. end;
  1301. end;
  1302. p := @Data[finalPos];
  1303. {Move(
  1304. Pointer(p)^,Pointer(@p[1])^,
  1305. (ActualLength-(finalPos+1))*SizeOf(TReorderUnit)
  1306. );}
  1307. FillChar(Pointer(p)^,SizeOf(TReorderUnit),0);
  1308. p^.Assign(AItem);
  1309. p^.Changed := True;
  1310. end;
  1311. if (k >= 0) then begin
  1312. if (k > finalPos) then
  1313. Inc(k);
  1314. Delete(k);
  1315. end;
  1316. Result := finalPos;
  1317. end;
  1318. function TOrderedCharacters.Append(const AItem : TReorderUnit) : Integer;
  1319. begin
  1320. Result := Insert(AItem,ActualLength);
  1321. end;
  1322. procedure TOrderedCharacters.Delete(const AIndex : Integer);
  1323. var
  1324. i : Integer;
  1325. p : PReorderUnit;
  1326. begin
  1327. if (AIndex < 0) or (AIndex >= ActualLength) then
  1328. raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
  1329. if (AIndex = (ActualLength-1)) then begin
  1330. Data[AIndex].Clear();
  1331. end else begin
  1332. //Data[AIndex].Clear();
  1333. p := @Data[AIndex];
  1334. p^.Clear();
  1335. for i := AIndex to ActualLength-2 do begin
  1336. Move((p+1)^,p^,SizeOf(p^));
  1337. Inc(p);
  1338. end;
  1339. {Move(
  1340. Pointer(@Data[(AIndex+1)])^,Pointer(@Data[AIndex])^,
  1341. (ActualLength-(AIndex+1))*SizeOf(TReorderUnit)
  1342. );}
  1343. FillChar(Pointer(@Data[(FActualLength-1)])^,SizeOf(TReorderUnit),0);
  1344. end;
  1345. FActualLength := FActualLength - 1;
  1346. end;
  1347. procedure TOrderedCharacters.ApplyStatement(const AStatement : PReorderSequence);
  1348. begin
  1349. ApplyStatementToSequence(Self,AStatement,1);
  1350. end;
  1351. function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
  1352. begin
  1353. if (ACollation.ItemCount = 0) then
  1354. exit('');
  1355. if (ACollation.IndexOf(ACollation.DefaultType) <> -1) then
  1356. exit(ACollation.DefaultType);
  1357. Result := 'standard';
  1358. if (ACollation.IndexOf(Result) <> -1) then
  1359. exit;
  1360. Result := 'search';
  1361. if (ACollation.IndexOf(Result) <> -1) then
  1362. exit;
  1363. if (ACollation.ItemCount > 0) then
  1364. Result := ACollation.Items[0].TypeName;
  1365. end;
  1366. procedure GenerateUCA_CLDR_Head(
  1367. ADest : TStream;
  1368. ABook : PUCA_DataBook;
  1369. AProps : PUCA_PropBook;
  1370. ACollation : TCldrCollationItem
  1371. );
  1372. procedure AddLine(const ALine : ansistring);
  1373. var
  1374. buffer : ansistring;
  1375. begin
  1376. buffer := ALine + sLineBreak;
  1377. ADest.Write(buffer[1],Length(buffer));
  1378. end;
  1379. procedure AddFields();
  1380. var
  1381. kc : Integer;
  1382. e : TCollationField;
  1383. ks : string;
  1384. ti : PTypeInfo;
  1385. begin
  1386. ti := TypeInfo(TCollationField);
  1387. ks := '';
  1388. kc := 0;
  1389. for e := Low(TCollationField) to High(TCollationField) do begin
  1390. if (e in ACollation.ChangedFields) then begin
  1391. ks := ks + ti^.Name + '.' +
  1392. GetEnumName(ti,Ord(e)) + ', ';
  1393. kc := kc + 1;
  1394. end
  1395. end;
  1396. if (AProps <> nil) then begin
  1397. if (AProps^.VariableLowLimit < High(Word)) then begin
  1398. ks := ks + ti^.Name + '.' +
  1399. GetEnumName(ti,Ord(TCollationField.VariableLowLimit)) + ', ';
  1400. kc := kc + 1;
  1401. end;
  1402. if (AProps^.VariableHighLimit > 0) then begin
  1403. ks := ks + ti^.Name + '.' +
  1404. GetEnumName(ti,Ord(TCollationField.VariableHighLimit)) + ', ';
  1405. kc := kc + 1;
  1406. end;
  1407. end;
  1408. if (kc > 0) then
  1409. ks := Copy(ks,1,(Length(ks)-2));
  1410. AddLine(' UPDATED_FIELDS = [ ' + ks + ' ];');
  1411. end;
  1412. begin
  1413. AddLine('{$mode objfpc}{$H+}');
  1414. AddLine('unit ' + COLLATION_FILE_PREFIX + LowerCase(ACollation.Parent.LocalID)+ ';'+sLineBreak);
  1415. AddLine('interface'+sLineBreak);
  1416. AddLine('implementation');
  1417. AddLine('uses');
  1418. AddLine(' unicodedata, unicodeducet;'+sLineBreak);
  1419. AddLine('const');
  1420. AddFields();
  1421. AddLine(' COLLATION_NAME = ' + QuotedStr(ACollation.Parent.Language) + ';');
  1422. AddLine(' BASE_COLLATION = ' + QuotedStr(ACollation.Base) + ';');
  1423. AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
  1424. if (AProps <> nil) then begin
  1425. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
  1426. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
  1427. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
  1428. end else begin
  1429. AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(High(Word)) + ';');
  1430. AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(0) + ';');
  1431. AddLine(' VARIABLE_WEIGHT = ' + IntToStr(0) + ';');
  1432. end;
  1433. AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
  1434. AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
  1435. AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
  1436. AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
  1437. if (AProps <> nil) then
  1438. AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
  1439. AddLine('');
  1440. end;
  1441. procedure GenerateUCA_CLDR_Registration(
  1442. ADest : TStream;
  1443. ABook : PUCA_DataBook
  1444. );
  1445. procedure AddLine(const ALine : ansistring);
  1446. var
  1447. buffer : ansistring;
  1448. begin
  1449. buffer := ALine + sLineBreak;
  1450. ADest.Write(buffer[1],Length(buffer));
  1451. end;
  1452. begin
  1453. AddLine('var');
  1454. AddLine(' CLDR_Collation : TUCA_DataBook = (');
  1455. AddLine(' Base : nil;');
  1456. AddLine(' Version : VERSION_STRING;');
  1457. AddLine(' CollationName : COLLATION_NAME;');
  1458. AddLine(' VariableWeight : TUCA_VariableKind(VARIABLE_WEIGHT);');
  1459. AddLine(' Backwards : (BACKWARDS_0,BACKWARDS_1,BACKWARDS_2,BACKWARDS_3);');
  1460. if (Length(ABook^.Lines) > 0) then begin
  1461. AddLine(' BMP_Table1 : @UCA_TABLE_1[0];');
  1462. AddLine(' BMP_Table2 : @UCA_TABLE_2[0];');
  1463. AddLine(' OBMP_Table1 : @UCAO_TABLE_1[0];');
  1464. AddLine(' OBMP_Table2 : @UCAO_TABLE_2[0];');
  1465. AddLine(' PropCount : PROP_COUNT;');
  1466. AddLine(' Props : PUCA_PropItemRec(@UCA_PROPS[0]);');
  1467. end else begin
  1468. AddLine(' BMP_Table1 : nil;');
  1469. AddLine(' BMP_Table2 : nil;');
  1470. AddLine(' OBMP_Table1 : nil;');
  1471. AddLine(' OBMP_Table2 : nil;');
  1472. AddLine(' PropCount : 0;');
  1473. AddLine(' Props : nil;');
  1474. end;
  1475. AddLine(' VariableLowLimit : VARIABLE_LOW_LIMIT;');
  1476. AddLine(' VariableHighLimit : VARIABLE_HIGH_LIMIT;');
  1477. AddLine(' );');
  1478. AddLine('');
  1479. AddLine('procedure Register();');
  1480. AddLine('begin');
  1481. AddLine(' PrepareCollation(@CLDR_Collation,BASE_COLLATION,UPDATED_FIELDS);');
  1482. AddLine(' RegisterCollation(@CLDR_Collation);');
  1483. AddLine('end;');
  1484. AddLine('');
  1485. AddLine('initialization');
  1486. AddLine(' Register();');
  1487. AddLine('');
  1488. AddLine('finalization');
  1489. AddLine(' UnregisterCollation(COLLATION_NAME);');
  1490. AddLine('');
  1491. AddLine('end.');
  1492. end;
  1493. procedure CheckEndianTransform(const ASource : PUCA_PropBook);
  1494. var
  1495. x, y : array of Byte;
  1496. px, py : PUCA_PropItemRec;
  1497. begin
  1498. if (ASource = nil) or (ASource^.ItemSize = 0) then
  1499. exit;
  1500. SetLength(x,ASource^.ItemSize);
  1501. px := PUCA_PropItemRec(@x[0]);
  1502. ReverseFromNativeEndian(ASource^.Items,ASource^.ItemSize,px);
  1503. SetLength(y,ASource^.ItemSize);
  1504. py := PUCA_PropItemRec(@y[0]);
  1505. ReverseToNativeEndian(px,ASource^.ItemSize,py);
  1506. if not CompareMem(ASource^.Items,@y[0],Length(x)) then
  1507. CompareProps(ASource^.Items, PUCA_PropItemRec(@y[0]),ASource^.ItemSize);
  1508. end;
  1509. procedure GenerateCdlrCollation(
  1510. ACollation : TCldrCollation;
  1511. AItemName : string;
  1512. AStoreName : string;
  1513. AStream,
  1514. ANativeEndianStream,
  1515. AOtherEndianStream : TStream;
  1516. ARootChars : TOrderedCharacters;
  1517. ARootWeigths : TUCA_LineRecArray
  1518. );
  1519. procedure AddLine(const ALine : ansistring; ADestStream : TStream);
  1520. var
  1521. buffer : ansistring;
  1522. begin
  1523. buffer := ALine + sLineBreak;
  1524. ADestStream.Write(buffer[1],Length(buffer));
  1525. end;
  1526. var
  1527. locUcaBook : TUCA_DataBook;
  1528. locSequence : TOrderedCharacters;
  1529. locItem : TCldrCollationItem;
  1530. i : Integer;
  1531. locUcaProps : PUCA_PropBook;
  1532. ucaFirstTable : TucaBmpFirstTable;
  1533. ucaSecondTable : TucaBmpSecondTable;
  1534. ucaoFirstTable : TucaoBmpFirstTable;
  1535. ucaoSecondTable : TucaOBmpSecondTable;
  1536. locHasProps : Boolean;
  1537. s : string;
  1538. begin
  1539. locItem := ACollation.Find(AItemName);
  1540. if (locItem = nil) then
  1541. raise Exception.CreateFmt('Collation Item not found : "%s".',[AItemName]);
  1542. locSequence := ARootChars.Clone();
  1543. for i := 0 to Length(locItem.Rules) - 1 do
  1544. locSequence.ApplyStatement(@locItem.Rules[i]);
  1545. FillChar(locUcaBook,SizeOf(locUcaBook),0);
  1546. locUcaBook.Version := ACollation.Version;
  1547. locUcaBook.Backwards[1] := locItem.Backwards;
  1548. ComputeWeigths(@locSequence.Data[0],locSequence.ActualLength,ARootWeigths,locUcaBook.Lines);
  1549. for i := 0 to Length(locUcaBook.Lines) - 1 do
  1550. locUcaBook.Lines[i].Stored := True;
  1551. locHasProps := (Length(locUcaBook.Lines) > 0);
  1552. if not locHasProps then
  1553. locUcaProps := nil
  1554. else
  1555. MakeUCA_Props(@locUcaBook,locUcaProps);
  1556. try
  1557. CheckEndianTransform(locUcaProps);
  1558. if locHasProps then begin
  1559. MakeUCA_BmpTables(ucaFirstTable,ucaSecondTable,locUcaProps);
  1560. SetLength(ucaoSecondTable,100);
  1561. MakeUCA_OBmpTables(ucaoFirstTable,ucaoSecondTable,locUcaProps);
  1562. end;
  1563. GenerateLicenceText(AStream);
  1564. GenerateUCA_CLDR_Head(AStream,@locUcaBook,locUcaProps,locItem);
  1565. if locHasProps then begin
  1566. GenerateUCA_BmpTables(AStream,ANativeEndianStream,AOtherEndianStream,ucaFirstTable,ucaSecondTable);
  1567. GenerateUCA_OBmpTables(AStream,ANativeEndianStream,AOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
  1568. GenerateUCA_PropTable(ANativeEndianStream,locUcaProps,ENDIAN_NATIVE);
  1569. GenerateUCA_PropTable(AOtherEndianStream,locUcaProps,ENDIAN_NON_NATIVE);
  1570. AddLine('{$ifdef FPC_LITTLE_ENDIAN}',AStream);
  1571. s := GenerateEndianIncludeFileName(AStoreName,ekLittle);
  1572. AddLine(Format(' {$include %s}',[ExtractFileName(s)]),AStream);
  1573. AddLine('{$else FPC_LITTLE_ENDIAN}',AStream);
  1574. s := GenerateEndianIncludeFileName(AStoreName,ekBig);
  1575. AddLine(Format(' {$include %s}',[ExtractFileName(s)]),AStream);
  1576. AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
  1577. end;
  1578. GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
  1579. finally
  1580. locSequence.Clear();
  1581. FreeUcaBook(locUcaProps);
  1582. end;
  1583. end;
  1584. end.