cldrxml.pas 37 KB


  1. { Parser of the CLDR collation xml files.
  2. Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. { The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML,
  17. ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The
  18. old version was unsing a XML syntax for collation's rules specifications.
  19. The new versions (and going forward) will be using the text syntax.
  20. }
  21. unit cldrxml;
  22. {$mode delphi}{$H+}
  23. {$TypedAddress on}
  24. interface
  25. uses
  26. Classes, SysUtils, DOM,
  27. cldrhelper;
  28. type
  29. { TCldrCollationFileLoader }
  30. TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader)
  31. private
  32. FPath : string;
  33. private
  34. procedure SetPath(APath : string);
  35. function BuildFileName(ALanguage : string) : string;
  36. procedure CheckFile(AFileName : string);
  37. protected
  38. procedure LoadCollation(
  39. const ALanguage : string;
  40. ACollation : TCldrCollation;
  41. AMode : TCldrParserMode
  42. );
  43. procedure LoadCollationType(
  44. const ALanguage,
  45. ATypeName : string;
  46. AType : TCldrCollationItem
  47. );
  48. public
  49. constructor Create(APath : string);
  50. end;
  51. { TCldrCollationStreamLoader }
  52. TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader)
  53. private
  54. FLanguages : array of string;
  55. FStreams : array of TStream;
  56. private
  57. procedure CheckContent(ALanguage : string);
  58. function IndexOf(ALanguage : string) : Integer;
  59. protected
  60. procedure LoadCollation(
  61. const ALanguage : string;
  62. ACollation : TCldrCollation;
  63. AMode : TCldrParserMode
  64. );
  65. procedure LoadCollationType(
  66. const ALanguage,
  67. ATypeName : string;
  68. AType : TCldrCollationItem
  69. );
  70. public
  71. constructor Create(
  72. const ALanguages : array of string;
  73. const AStreams : array of TStream
  74. );
  75. destructor Destroy();override;
  76. end;
  77. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
  78. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload;
  79. procedure ParseCollationDocumentXML(
  80. ADoc : TDOMDocument;
  81. ACollation : TCldrCollation;
  82. AMode : TCldrParserMode
  83. );overload;
  84. procedure ParseCollationDocumentXML(
  85. ADoc : TDOMDocument;
  86. ACollation : TCldrCollationItem;
  87. AType : string
  88. );overload;
  89. procedure ParseCollationDocumentXML(
  90. const AFileName : string;
  91. ACollation : TCldrCollation;
  92. AMode : TCldrParserMode
  93. );overload;
  94. procedure ParseCollationDocumentXML(
  95. const AFileName : string;
  96. ACollation : TCldrCollationItem;
  97. AType : string
  98. );overload;
  99. //-----------------------------------------------------
  100. procedure ParseCollationDocument2(
  101. ADoc : TDOMDocument;
  102. ACollation : TCldrCollation;
  103. AMode : TCldrParserMode
  104. );overload;
  105. procedure ParseCollationDocument2(
  106. const AFileName : string;
  107. ACollation : TCldrCollation;
  108. AMode : TCldrParserMode
  109. );overload;
  110. procedure ParseCollationDocument2(
  111. AStream : TStream;
  112. ACollation : TCldrCollation;
  113. AMode : TCldrParserMode
  114. );overload;
  115. procedure ParseCollationDocument2(
  116. const AFileName : string;
  117. ACollation : TCldrCollationItem;
  118. AType : string
  119. );overload;
  120. procedure ParseCollationDocument2(
  121. ADoc : TDOMDocument;
  122. ACollation : TCldrCollationItem;
  123. AType : string
  124. );overload;
  125. procedure ParseCollationDocument2(
  126. AStream : TStream;
  127. ACollation : TCldrCollationItem;
  128. AType : string
  129. );overload;
  130. implementation
  131. uses
  132. typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt;
  133. const
  134. s_ALT = 'alt';
  135. s_AT = 'at';
  136. //s_BEFORE = 'before';
  137. s_CODEPOINT = 'codepoint';
  138. s_COLLATION = 'collation';
  139. s_COLLATIONS = 'collations';
  140. s_CONTEXT = 'context';
  141. //s_DEFAULT = 'default';
  142. s_EXTEND = 'extend';
  143. s_HEX = 'hex';
  144. s_POSITION = 'position';
  145. s_RESET = 'reset';
  146. s_RULES = 'rules';
  147. //s_STANDART = 'standard';
  148. s_TYPE = 'type';
  149. s_CR = 'cr';
  150. procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
  151. begin
  152. if (ANode.NodeName <> AExpectedName) then
  153. raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
  154. end;
  155. function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
  156. begin
  157. case AChar of
  158. 'p' : Result := TReorderWeigthKind.PriMary;
  159. 's' : Result := TReorderWeigthKind.Secondary;
  160. 't' : Result := TReorderWeigthKind.Tertiary;
  161. 'i' : Result := TReorderWeigthKind.Identity;
  162. else
  163. Result := TReorderWeigthKind.Identity;
  164. end;
  165. end;
  166. function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
  167. var
  168. u4str : UCS4String;
  169. k : Integer;
  170. begin
  171. if (Length(AValue) = 0) then
  172. exit(nil);
  173. if (Length(AValue) = 1) then begin
  174. SetLength(Result,1);
  175. Result[0] := Ord(AValue[1])
  176. end else begin
  177. u4str := WideStringToUCS4String(AValue);
  178. k := Length(u4str) - 1; // remove the last #0
  179. SetLength(Result,k);
  180. for k := 0 to k - 1 do
  181. Result[k] := u4str[k];
  182. end;
  183. end;
  184. function ParseStatementXML(
  185. ARules : TDOMElement;
  186. AStartPosition : Integer;
  187. AStatement : PReorderSequence;
  188. var ANextPos : Integer
  189. ) : Boolean;
  190. var
  191. startPosition : Integer;
  192. statement : PReorderSequence;
  193. elementActualCount : Integer;
  194. list : TDOMNodeList;
  195. inBlock : Boolean;
  196. procedure SkipComments();
  197. begin
  198. while (startPosition < list.Count) do begin
  199. if (list[startPosition].NodeType <> COMMENT_NODE) then
  200. Break;
  201. Inc(startPosition);
  202. end;
  203. end;
  204. function parse_reset() : Integer;
  205. var
  206. n, t : TDOMNode;
  207. s : string;
  208. logicalPos : TReorderLogicalReset;
  209. begin
  210. SkipComments();
  211. n := list[startPosition];
  212. CheckNodeName(n,s_RESET);
  213. if n.HasChildNodes() then begin
  214. n := n.FirstChild;
  215. if (n.NodeType = TEXT_NODE) then begin
  216. statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
  217. Result := startPosition+1;
  218. end else begin
  219. if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
  220. raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
  221. statement^.LogicalPosition := logicalPos;
  222. Result := startPosition+1;
  223. end;
  224. end else if not n.HasChildNodes() then begin
  225. if (list[startPosition+1].NodeName = s_POSITION) then begin
  226. s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
  227. if not TryStrToLogicalReorder(s,logicalPos) then
  228. raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
  229. statement^.LogicalPosition := logicalPos;
  230. Result := startPosition+2;
  231. end else begin
  232. t := list[startPosition+1];
  233. {if (t.NodeType <> TEXT_NODE) then
  234. raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
  235. if (t.NodeType = TEXT_NODE) then
  236. statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
  237. else
  238. statement^.Reset := DomString2UnicodeCodePointArray(' ');
  239. Result := startPosition+2;
  240. end;
  241. end;
  242. if (statement^.LogicalPosition = TReorderLogicalReset.None) and
  243. (Length(statement^.Reset) = 0)
  244. then
  245. raise Exception.Create(sInvalidResetClause);
  246. end;
  247. procedure EnsureElementLength(const ALength : Integer);
  248. var
  249. k, d : Integer;
  250. begin
  251. k := Length(statement^.Elements);
  252. if (k < ALength) then begin
  253. k := ALength;
  254. if (k = 0) then begin
  255. k := 50;
  256. end else begin
  257. if (k < 10) then
  258. d := 10
  259. else
  260. d := 2;
  261. k := k * d;
  262. end;
  263. SetLength(statement^.Elements,k);
  264. end;
  265. end;
  266. procedure AddElement(
  267. const AChars : array of UCS4Char;
  268. const AWeigthKind : TReorderWeigthKind;
  269. const AContext : DOMString
  270. );overload;
  271. var
  272. kp : PReorderUnit;
  273. k : Integer;
  274. begin
  275. EnsureElementLength(elementActualCount+1);
  276. kp := @statement^.Elements[elementActualCount];
  277. SetLength(kp^.Characters,Length(AChars));
  278. for k := 0 to Length(AChars) - 1 do
  279. kp^.Characters[k] := AChars[k];
  280. kp^.WeigthKind := AWeigthKind;
  281. elementActualCount := elementActualCount + 1;
  282. if (AContext <> '') then
  283. kp^.Context := DomString2UnicodeCodePointArray(AContext);
  284. end;
  285. procedure ReadChars(
  286. ANode : TDOMNode;
  287. APos : Integer;
  288. var AChars : UCS4String
  289. );
  290. var
  291. t : TDOMNode;
  292. u4str : UCS4String;
  293. s : DOMString;
  294. begin
  295. if not ANode.HasChildNodes() then begin
  296. SetLength(AChars,1);
  297. AChars[0] := Ord(UnicodeChar(' '));
  298. exit;
  299. //raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
  300. end;
  301. t := ANode.FindNode(s_CODEPOINT);
  302. if (t = nil) then begin
  303. if (ANode.ChildNodes.Count <> 1) then
  304. raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
  305. t := ANode.ChildNodes[0];
  306. if not t.InheritsFrom(TDOMText) then
  307. raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
  308. s := TDOMText(t).Data;
  309. if (Length(s) = 1) then begin
  310. SetLength(AChars,1);
  311. AChars[0] := Ord(s[1]);
  312. end else begin
  313. u4str := WideStringToUCS4String(s);
  314. AChars := u4str;
  315. SetLength(AChars,Length(AChars)-1);
  316. end;
  317. end else begin
  318. t := t.Attributes.GetNamedItem(s_HEX);
  319. if (t = nil) then
  320. raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
  321. SetLength(AChars,1);
  322. AChars[0] := StrToInt('$'+t.NodeValue);
  323. end
  324. end;
  325. procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
  326. var
  327. k : Integer;
  328. begin
  329. k := Length(ADest);
  330. SetLength(ADest,(k+Length(APrefix)));
  331. Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
  332. for k := 0 to k - 1 do
  333. ADest[k] := APrefix[k];
  334. end;
  335. function ReadNextItem(const APos : Integer) : Integer;
  336. var
  337. n, t : TDOMNode;
  338. contextStr : DOMString;
  339. w : TReorderWeigthKind;
  340. isSimpleCharTag : Boolean;
  341. simpleCharTag : AnsiChar;
  342. last : PReorderUnit;
  343. u4str : UCS4String;
  344. k : Integer;
  345. begin
  346. contextStr := '';
  347. Result := APos;
  348. n := list[APos];
  349. isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
  350. if isSimpleCharTag then begin
  351. simpleCharTag := AnsiChar(n.NodeName[1]);
  352. if (simpleCharTag = 'x') then begin
  353. inBlock := True;
  354. n := n.FirstChild;
  355. if (n.NodeName = s_CONTEXT) then begin
  356. if n.HasChildNodes() then begin
  357. t := n.FirstChild;
  358. if (t.NodeType = TEXT_NODE) then
  359. contextStr := TDOMText(t).Data;
  360. end;
  361. n := n.NextSibling;
  362. end;
  363. isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
  364. if isSimpleCharTag then
  365. simpleCharTag := AnsiChar(n.NodeName[1]);
  366. end;
  367. end;
  368. if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
  369. w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
  370. ReadChars(n,APos,u4str);
  371. AddElement(u4str,w,contextStr);
  372. Result := Result + 1;
  373. if not inBlock then
  374. exit;
  375. last := @statement^.Elements[elementActualCount-1];
  376. n := n.NextSibling;
  377. if (n <> nil) and (n.NodeName = s_EXTEND) then begin
  378. ReadChars(n,APos,u4str);
  379. SetLength(last^.ExpansionChars,Length(u4str));
  380. for k := 0 to Length(u4str) - 1 do
  381. last^.ExpansionChars[k] := u4str[k];
  382. end;
  383. exit;
  384. end;
  385. if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
  386. (Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
  387. then begin
  388. w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
  389. ReadChars(n,APos,u4str);
  390. for k := Low(u4str) to High(u4str) do
  391. AddElement(u4str[k],w,contextStr);
  392. Result := Result + 1;
  393. exit;
  394. end;
  395. raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
  396. end;
  397. var
  398. i, c : Integer;
  399. n : TDOMNode;
  400. begin
  401. Result := False;
  402. inBlock := False;
  403. elementActualCount := 0;
  404. if (AStartPosition <= 0) then
  405. startPosition := 0
  406. else
  407. startPosition := AStartPosition;
  408. i := startPosition;
  409. list := ARules.ChildNodes;
  410. c := list.Count;
  411. if (c <= i) then
  412. exit;
  413. statement := AStatement;
  414. statement^.Clear();
  415. n := list[i];
  416. i := parse_reset();
  417. while (i < c) do begin
  418. n := list[i];
  419. if (n.NodeName = s_RESET) then
  420. Break;
  421. i := ReadNextItem(i);
  422. end;
  423. SetLength(statement^.Elements,elementActualCount);
  424. Result := (i > startPosition);
  425. if Result then
  426. ANextPos := i;
  427. end;
  428. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);
  429. var
  430. n : TDOMNode;
  431. rulesElement : TDOMElement;
  432. i, c, nextPost : Integer;
  433. statement : TReorderSequence;
  434. p : PReorderUnit;
  435. begin
  436. n := ADoc.DocumentElement.FindNode(s_RULES);
  437. if (n = nil) then
  438. raise Exception.Create(sRulesNodeNotFound);
  439. rulesElement := n as TDOMElement;
  440. c := rulesElement.ChildNodes.Count;
  441. ASequence^.Clear();
  442. SetLength(ASequence^.Data,c+100);
  443. nextPost := 0;
  444. i := 0;
  445. while (i < c) do begin
  446. statement.Clear();
  447. if not ParseStatementXML(rulesElement,i,@statement,nextPost) then
  448. Break;
  449. i := nextPost;
  450. try
  451. ASequence^.ApplyStatement(@statement);
  452. except
  453. on e : Exception do begin
  454. e.Message := Format('%s Position = %d',[e.Message,i]);
  455. raise;
  456. end;
  457. end;
  458. end;
  459. if (ASequence^.ActualLength > 0) then begin
  460. p := @ASequence^.Data[0];
  461. for i := 0 to ASequence^.ActualLength - 1 do begin
  462. p^.Changed := False;
  463. Inc(p);
  464. end;
  465. end;
  466. end;
  467. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);
  468. var
  469. doc : TXMLDocument;
  470. begin
  471. ReadXMLFile(doc,AFileName);
  472. try
  473. ParseInitialDocumentXML(ASequence,doc);
  474. finally
  475. doc.Free();
  476. end;
  477. end;
  478. function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
  479. var
  480. xv : TXPathVariable;
  481. begin
  482. xv := EvaluateXPathExpression(AExpression,AContextNode);
  483. try
  484. if (xv <> nil) then
  485. Result := xv.AsText
  486. else
  487. Result := '';
  488. finally
  489. xv.Free();
  490. end;
  491. end;
  492. function ParseDeletion(
  493. const APattern : DOMString;
  494. ASequence : PReorderSequence
  495. ) : Integer;
  496. var
  497. r : array of TReorderUnit;
  498. c : Integer;
  499. uset : TUnicodeSet;
  500. it : TUnicodeSet.TIterator;
  501. p : PReorderUnit;
  502. begin
  503. if (APattern = '') then
  504. exit(0);
  505. it := nil;
  506. uset := TUnicodeSet.Create();
  507. try
  508. uset.AddPattern(APattern);
  509. it := uset.CreateIterator();
  510. c := 0;
  511. it.Reset();
  512. while it.MoveNext() do begin
  513. Inc(c);
  514. end;
  515. SetLength(r,c);
  516. p := @r[0];
  517. it.Reset();
  518. while it.MoveNext() do begin
  519. p^.Clear();
  520. p^.WeigthKind := TReorderWeigthKind.Deletion;
  521. p^.Characters := Copy(it.GetCurrent());
  522. Inc(p);
  523. end;
  524. ASequence^.Clear();
  525. ASequence^.Elements := r;
  526. finally
  527. it.Free();
  528. uset.Free();
  529. end;
  530. r := nil;
  531. Result := c;
  532. end;
  533. procedure ParseCollationItemXML(
  534. ACollationNode : TDOMElement;
  535. AItem : TCldrCollationItem;
  536. AMode : TCldrParserMode
  537. );
  538. var
  539. n : TDOMNode;
  540. rulesElement : TDOMElement;
  541. i, c, nextPos : Integer;
  542. statementList : TReorderSequenceArray;
  543. sal : Integer;//statement actual length
  544. statement : PReorderSequence;
  545. s : DOMString;
  546. begin
  547. AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
  548. AItem.Base := EvaluateXPathStr('base',ACollationNode);
  549. AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
  550. if AItem.Backwards then
  551. AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWards];
  552. AItem.Rules := nil;
  553. if (AMode = TCldrParserMode.FullParsing) then begin
  554. SetLength(statementList,15);
  555. sal := 0;
  556. statement := @statementList[0];
  557. s := EvaluateXPathStr('suppress_contractions',ACollationNode);
  558. if (s <> '') then begin
  559. if (ParseDeletion(s,statement) > 0) then begin
  560. Inc(sal);
  561. Inc(statement);
  562. end else begin
  563. statement^.Clear();
  564. end;
  565. end;
  566. n := ACollationNode.FindNode(s_RULES);
  567. if (n <> nil) then begin
  568. rulesElement := n as TDOMElement;
  569. c := rulesElement.ChildNodes.Count;
  570. nextPos := 0;
  571. i := 0;
  572. while (i < c) do begin
  573. statement^.Clear();
  574. if not ParseStatementXML(rulesElement,i,statement,nextPos) then
  575. Break;
  576. i := nextPos;
  577. Inc(statement);
  578. Inc(sal);
  579. if (sal >= Length(statementList)) then begin
  580. SetLength(statementList,(sal*2));
  581. statement := @statementList[(sal-1)];
  582. end;
  583. end;
  584. end;
  585. SetLength(statementList,sal);
  586. AItem.Rules := statementList;
  587. end;
  588. end;
  589. function NextPart(
  590. const ABuffer : string;
  591. const AStartPos : Integer;
  592. const ASeparator : Char;
  593. out ANextStart : Integer
  594. ) : string;
  595. var
  596. c, sp, i : Integer;
  597. begin
  598. c := Length(ABuffer);
  599. if (c < 1) or (AStartPos > c) then begin
  600. ANextStart := c+1;
  601. Result := '';
  602. exit;
  603. end;
  604. if (AStartPos > 0) then
  605. sp := AStartPos
  606. else
  607. sp := 1;
  608. i := sp;
  609. while (i <= c) do begin
  610. if (ABuffer[i] = ASeparator) then
  611. break;
  612. i := i+1;
  613. end;
  614. Result := Copy(ABuffer,sp,(i-sp));
  615. if (i <= c) then
  616. i := i+1;
  617. ANextStart := i;
  618. end;
  619. procedure HandleSetting_Import(
  620. AItem : TCldrCollationItem;
  621. ASetting : PSettingRec
  622. );
  623. var
  624. buffer, lang, col, s : UTF8String;
  625. i, ns : Integer;
  626. begin
  627. if (Length(ASetting^.Values) <> 1) then begin
  628. buffer := '';
  629. if (Length(ASetting^.Values) > 0) then begin
  630. for i := 0 to Length(ASetting^.Values)-1 do
  631. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  632. end;
  633. raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
  634. end;
  635. buffer := ASetting^.Values[0];
  636. lang := NextPart(buffer,1,'-',ns);
  637. i := ns;
  638. col := '';
  639. s := NextPart(buffer,i,'-',ns);
  640. if (s <> '') then begin
  641. if (s <> 'u') then
  642. raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
  643. i := ns;
  644. s := NextPart(buffer,i,'-',ns);
  645. if (s <> 'co') then
  646. raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
  647. s := Trim(Copy(buffer,ns,(Length(buffer)-ns+1)));
  648. if (s = '') then
  649. raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
  650. col := s;
  651. end;
  652. if (col = '') then
  653. col := COLLATION_ITEM_DEFAULT;
  654. if (LowerCase(lang) = 'und') then
  655. lang := 'root';
  656. AItem.Imports.Add(lang,col);
  657. ASetting^.Understood := True;
  658. end;
  659. procedure HandleSetting_Backwards(
  660. AItem : TCldrCollationItem;
  661. ASetting : PSettingRec
  662. );
  663. var
  664. buffer : UTF8String;
  665. i : Integer;
  666. begin
  667. if (Length(ASetting^.Values) <> 1) then begin
  668. buffer := '';
  669. if (Length(ASetting^.Values) > 0) then begin
  670. for i := 0 to Length(ASetting^.Values)-1 do
  671. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  672. end;
  673. raise Exception.CreateFmt(sInvalidBackwardsStatement,[buffer]);
  674. end;
  675. if (ASetting^.Values[0] = '2') then
  676. AItem.Backwards := True
  677. else
  678. raise Exception.CreateFmt(
  679. sInvalidSettingValue,
  680. [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
  681. );
  682. AItem.ChangedFields := AItem.ChangedFields+[TCollationField.BackWards];
  683. ASetting^.Understood := True;
  684. end;
  685. procedure HandleSetting_Alternate(
  686. AItem : TCldrCollationItem;
  687. ASetting : PSettingRec
  688. );
  689. var
  690. buffer : UTF8String;
  691. i : Integer;
  692. begin
  693. if (Length(ASetting^.Values) <> 1) then begin
  694. buffer := '';
  695. if (Length(ASetting^.Values) > 0) then begin
  696. for i := 0 to Length(ASetting^.Values)-1 do
  697. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  698. end;
  699. raise Exception.CreateFmt(sInvalidAlternateStatement,[buffer]);
  700. end;
  701. buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
  702. if (buffer = 'non-ignorable') then
  703. AItem.VariableWeight := ucaNonIgnorable
  704. else if (buffer = 'shifted') then
  705. AItem.VariableWeight := ucaShifted
  706. else
  707. raise Exception.CreateFmt(
  708. sInvalidSettingValue,
  709. [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
  710. );
  711. AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Alternate];
  712. ASetting^.Understood := True;
  713. end;
  714. procedure HandleSetting_Normalization(
  715. AItem : TCldrCollationItem;
  716. ASetting : PSettingRec
  717. );
  718. var
  719. buffer : UTF8String;
  720. i : Integer;
  721. begin
  722. if (Length(ASetting^.Values) <> 1) then begin
  723. buffer := '';
  724. if (Length(ASetting^.Values) > 0) then begin
  725. for i := 0 to Length(ASetting^.Values)-1 do
  726. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  727. end;
  728. raise Exception.CreateFmt(sInvalidNormalizationStatement,[buffer]);
  729. end;
  730. buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
  731. if (buffer = 'off') then
  732. AItem.Normalization := False
  733. else if (buffer = 'on') then
  734. AItem.Normalization := True
  735. else
  736. raise Exception.CreateFmt(
  737. sInvalidSettingValue,
  738. [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
  739. );
  740. AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Normalization];
  741. ASetting^.Understood := True;
  742. end;
  743. procedure HandleSetting_Strength(
  744. AItem : TCldrCollationItem;
  745. ASetting : PSettingRec
  746. );
  747. var
  748. buffer : UTF8String;
  749. i : Integer;
  750. begin
  751. if (Length(ASetting^.Values) <> 1) then begin
  752. buffer := '';
  753. if (Length(ASetting^.Values) > 0) then begin
  754. for i := 0 to Length(ASetting^.Values)-1 do
  755. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  756. end;
  757. raise Exception.CreateFmt(sInvalidStrengthStatement,[buffer]);
  758. end;
  759. buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
  760. if (buffer = '1') then
  761. AItem.Strength := TComparisonStrength.Primary
  762. else if (buffer = '2') then
  763. AItem.Strength := TComparisonStrength.Secondary
  764. else if (buffer = '3') then
  765. AItem.Strength := TComparisonStrength.Tertiary
  766. else if (buffer = '4') then
  767. AItem.Strength := TComparisonStrength.Quaternary
  768. else if (buffer = 'i') then
  769. AItem.Strength := TComparisonStrength.Identity
  770. else
  771. raise Exception.CreateFmt(
  772. sInvalidSettingValue,
  773. [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
  774. );
  775. AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Strength];
  776. ASetting^.Understood := True;
  777. end;
  778. procedure HandleSetting_EMPTY_PROC(
  779. AItem : TCldrCollationItem;
  780. ASetting : PSettingRec
  781. );
  782. begin
  783. //
  784. end;
  785. type
  786. TSettingHandlerProc = procedure (
  787. AItem : TCldrCollationItem;
  788. ASetting : PSettingRec
  789. );
  790. const
  791. SETTING_HANDLERS : array[TSettingOption] of TSettingHandlerProc =(
  792. HandleSetting_EMPTY_PROC, HandleSetting_Strength, HandleSetting_Alternate,
  793. //Unknown, Strength, Alternate,
  794. HandleSetting_Backwards, HandleSetting_Normalization, HandleSetting_EMPTY_PROC,
  795. //Backwards, Normalization, CaseLevel,
  796. HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC,
  797. //CaseFirst, HiraganaQ, NumericOrdering,
  798. HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_Import,
  799. //Reorder, MaxVariable Import
  800. HandleSetting_EMPTY_PROC,
  801. //SuppressContractions has a special handling see Process_SuppressContractions
  802. HandleSetting_EMPTY_PROC
  803. //Optimize
  804. );
  805. procedure HandleSettings(AItem : TCldrCollationItem);
  806. var
  807. i, c : Integer;
  808. p : PSettingRec;
  809. begin
  810. c := Length(AItem.Settings);
  811. if (c < 1) then
  812. exit;
  813. p := @AItem.Settings[0];
  814. for i := 0 to c-1 do begin
  815. SETTING_HANDLERS[p^.OptionValue](AItem,p);
  816. Inc(p);
  817. end;
  818. end;
  819. function Process_SuppressContractions(
  820. ASetting : PSettingRec;
  821. AStatement : PReorderSequence
  822. ) : Boolean;
  823. var
  824. buffer : UTF8String;
  825. i : Integer;
  826. begin
  827. if (Length(ASetting^.Values) <> 1) then begin
  828. buffer := '';
  829. if (Length(ASetting^.Values) > 0) then begin
  830. for i := 0 to Length(ASetting^.Values)-1 do
  831. buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
  832. end;
  833. raise Exception.CreateFmt(sInvalidSuppressContractionsStatement,[buffer]);
  834. end;
  835. Result := (ParseDeletion(DOMString(ASetting^.Values[0]),AStatement) > 0);
  836. ASetting.Understood := Result;
  837. end;
  838. procedure ParseCollationItem2(
  839. ACollationNode : TDOMElement;
  840. AItem : TCldrCollationItem;
  841. AMode : TCldrParserMode
  842. );
  843. var
  844. statementList : TReorderSequenceArray;
  845. sal : Integer;//statement actual length
  846. statement : PReorderSequence;
  847. procedure AddStatementToArray();
  848. begin
  849. Inc(statement);
  850. Inc(sal);
  851. if (sal >= Length(statementList)) then begin
  852. SetLength(statementList,(sal*2));
  853. statement := @statementList[(sal-1)];
  854. end;
  855. end;
  856. var
  857. n : TDOMNode;
  858. rulesElement : TDOMCDATASection;
  859. i, c, nextPos : Integer;
  860. parsedStatement : TParsedStatement;
  861. s : DOMString;
  862. u8 : UTF8String;
  863. buffer : PAnsiChar;
  864. lineCount : Integer;
  865. settingArray : TSettingRecArray;
  866. begin
  867. AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
  868. AItem.Alt := ACollationNode.GetAttribute(s_ALT);
  869. AItem.Settings := nil;
  870. AItem.Rules := nil;
  871. AItem.Mode := AMode;
  872. if (AMode = TCldrParserMode.FullParsing) then begin
  873. SetLength(statementList,15);
  874. sal := 0;
  875. statement := @statementList[0];
  876. n := ACollationNode.FindNode(s_CR);
  877. if (n <> nil) then begin
  878. n := (n as TDOMElement).FirstChild;
  879. rulesElement := n as TDOMCDATASection;
  880. s := rulesElement.Data;
  881. u8 := UTF8Encode(s);
  882. c := Length(u8);
  883. buffer := @u8[1];
  884. nextPos := 0;
  885. i := 0;
  886. lineCount := 0;
  887. Clear(parsedStatement);
  888. settingArray := AItem.Settings;
  889. while (i < c) do begin
  890. statement^.Clear();
  891. if not ParseStatement(buffer,i,c,@parsedStatement,nextPos,lineCount) then
  892. Break;
  893. if (parsedStatement.Kind = TStatementKind.Sequence) then begin
  894. statement^.Assign(@parsedStatement.ReorderSequence);
  895. AddStatementToArray();
  896. end else if (parsedStatement.Kind = TStatementKind.Setting) then begin
  897. if (parsedStatement.Setting.OptionValue = TSettingOption.SuppressContractions) then begin
  898. if Process_SuppressContractions(@parsedStatement.Setting,statement) then
  899. AddStatementToArray()
  900. else
  901. statement^.Clear();
  902. end;
  903. AddItem(settingArray,@parsedStatement.Setting);
  904. end;
  905. i := nextPos;
  906. end;
  907. AItem.Settings := settingArray;
  908. if (Length(AItem.Settings) > 0) then
  909. HandleSettings(AItem);
  910. end;
  911. SetLength(statementList,sal);
  912. AItem.Rules := statementList;
  913. end;
  914. end;
  915. procedure ParseCollationDocumentXML(
  916. ADoc : TDOMDocument;
  917. ACollation : TCldrCollation;
  918. AMode : TCldrParserMode
  919. );
  920. var
  921. n : TDOMNode;
  922. collationsElement : TDOMElement;
  923. i, c : Integer;
  924. item : TCldrCollationItem;
  925. nl : TDOMNodeList;
  926. begin
  927. n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
  928. if (n = nil) then
  929. raise Exception.Create(sCollationsNodeNotFound);
  930. collationsElement := n as TDOMElement;
  931. ACollation.Clear();
  932. ACollation.Mode := AMode;
  933. ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
  934. ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
  935. ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
  936. if collationsElement.HasChildNodes() then begin
  937. nl := collationsElement.ChildNodes;
  938. c := nl.Count;
  939. item := nil;
  940. try
  941. for i := 0 to c - 1 do begin
  942. n := nl[i];
  943. if (n.NodeName = s_COLLATION) then begin
  944. item := TCldrCollationItem.Create();
  945. ParseCollationItemXML((n as TDOMElement),item,AMode);
  946. ACollation.Add(item);
  947. item := nil;
  948. end
  949. end;
  950. except
  951. FreeAndNil(item);
  952. raise;
  953. end;
  954. end;
  955. end;
  956. procedure ParseCollationDocumentXML(
  957. ADoc : TDOMDocument;
  958. ACollation : TCldrCollationItem;
  959. AType : string
  960. );
  961. var
  962. xv : TXPathVariable;
  963. begin
  964. xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
  965. try
  966. if (xv.AsNodeSet.Count = 0) then
  967. raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
  968. ACollation.Clear();
  969. ParseCollationItemXML((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
  970. finally
  971. xv.Free();
  972. end
  973. end;
  974. procedure ParseCollationDocument2(
  975. ADoc : TDOMDocument;
  976. ACollation : TCldrCollation;
  977. AMode : TCldrParserMode
  978. );
  979. var
  980. n : TDOMNode;
  981. collationsElement : TDOMElement;
  982. i, c : Integer;
  983. item, tempItem : TCldrCollationItem;
  984. nl : TDOMNodeList;
  985. isnew : boolean;
  986. begin
  987. n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
  988. if (n = nil) then
  989. raise Exception.Create(sCollationsNodeNotFound);
  990. collationsElement := n as TDOMElement;
  991. //ACollation.Clear();
  992. ACollation.Mode := AMode;
  993. ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
  994. ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
  995. ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement);
  996. if collationsElement.HasChildNodes() then begin
  997. nl := collationsElement.ChildNodes;
  998. c := nl.Count;
  999. tempItem := TCldrCollationItem.Create();
  1000. try
  1001. item := nil;
  1002. try
  1003. for i := 0 to c - 1 do begin
  1004. n := nl[i];
  1005. if (n.NodeName = s_COLLATION) then begin
  1006. tempItem.Clear();
  1007. ParseCollationItem2((n as TDOMElement),tempItem,TCldrParserMode.HeaderParsing);
  1008. item := ACollation.Find(tempItem.TypeName);
  1009. isnew := (item = nil);
  1010. if isnew then
  1011. item := TCldrCollationItem.Create();
  1012. if isnew or (item.Mode < AMode) then
  1013. ParseCollationItem2((n as TDOMElement),item,AMode);
  1014. if isnew then
  1015. ACollation.Add(item);
  1016. item := nil;
  1017. end
  1018. end;
  1019. except
  1020. FreeAndNil(item);
  1021. raise;
  1022. end;
  1023. finally
  1024. tempItem.Free();
  1025. end;
  1026. end;
  1027. end;
  1028. procedure ParseCollationDocument2(
  1029. ADoc : TDOMDocument;
  1030. ACollation : TCldrCollationItem;
  1031. AType : string
  1032. );
  1033. var
  1034. xv : TXPathVariable;
  1035. begin
  1036. xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
  1037. try
  1038. if (xv.AsNodeSet.Count = 0) then
  1039. raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
  1040. ACollation.Clear();
  1041. ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
  1042. finally
  1043. xv.Free();
  1044. end
  1045. end;
  1046. function ReadXMLFile(f: TStream) : TXMLDocument;overload;
  1047. var
  1048. src : TXMLInputSource;
  1049. parser: TDOMParser;
  1050. begin
  1051. src := TXMLInputSource.Create(f);
  1052. parser := TDOMParser.Create();
  1053. try
  1054. parser.Options.IgnoreComments := True;
  1055. parser.Parse(src, Result);
  1056. finally
  1057. src.Free();
  1058. parser.Free;
  1059. end;
  1060. end;
  1061. function ReadXMLFile(const AFilename: String) : TXMLDocument;overload;
  1062. var
  1063. FileStream: TStream;
  1064. begin
  1065. Result := nil;
  1066. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1067. try
  1068. Result := ReadXMLFile(FileStream);
  1069. finally
  1070. FileStream.Free;
  1071. end;
  1072. end;
  1073. procedure ParseCollationDocumentXML(
  1074. const AFileName : string;
  1075. ACollation : TCldrCollation;
  1076. AMode : TCldrParserMode
  1077. );
  1078. var
  1079. doc : TXMLDocument;
  1080. begin
  1081. doc := ReadXMLFile(AFileName);
  1082. try
  1083. ParseCollationDocumentXML(doc,ACollation,AMode);
  1084. ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
  1085. finally
  1086. doc.Free();
  1087. end;
  1088. end;
  1089. procedure ParseCollationDocumentXML(
  1090. const AFileName : string;
  1091. ACollation : TCldrCollationItem;
  1092. AType : string
  1093. );
  1094. var
  1095. doc : TXMLDocument;
  1096. begin
  1097. doc := ReadXMLFile(AFileName);
  1098. try
  1099. ParseCollationDocumentXML(doc,ACollation,AType);
  1100. finally
  1101. doc.Free();
  1102. end;
  1103. end;
  1104. procedure ParseCollationDocument2(
  1105. const AFileName : string;
  1106. ACollation : TCldrCollation;
  1107. AMode : TCldrParserMode
  1108. );
  1109. var
  1110. doc : TXMLDocument;
  1111. begin
  1112. doc := ReadXMLFile(AFileName);
  1113. try
  1114. ParseCollationDocument2(doc,ACollation,AMode);
  1115. ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
  1116. finally
  1117. doc.Free();
  1118. end;
  1119. end;
  1120. procedure ParseCollationDocument2(
  1121. AStream : TStream;
  1122. ACollation : TCldrCollation;
  1123. AMode : TCldrParserMode
  1124. );
  1125. var
  1126. doc : TXMLDocument;
  1127. begin
  1128. doc := ReadXMLFile(AStream);
  1129. try
  1130. ParseCollationDocument2(doc,ACollation,AMode);
  1131. finally
  1132. doc.Free();
  1133. end;
  1134. end;
  1135. procedure ParseCollationDocument2(
  1136. const AFileName : string;
  1137. ACollation : TCldrCollationItem;
  1138. AType : string
  1139. );
  1140. var
  1141. doc : TXMLDocument;
  1142. begin
  1143. doc := ReadXMLFile(AFileName);
  1144. try
  1145. ParseCollationDocument2(doc,ACollation,AType);
  1146. finally
  1147. doc.Free();
  1148. end;
  1149. end;
  1150. procedure ParseCollationDocument2(
  1151. AStream : TStream;
  1152. ACollation : TCldrCollationItem;
  1153. AType : string
  1154. );
  1155. var
  1156. doc : TXMLDocument;
  1157. begin
  1158. doc := ReadXMLFile(AStream);
  1159. try
  1160. ParseCollationDocument2(doc,ACollation,AType);
  1161. finally
  1162. doc.Free();
  1163. end;
  1164. end;
  1165. { TCldrCollationStreamLoader }
  1166. procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string);
  1167. begin
  1168. if not FileExists(ALanguage) then
  1169. raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]);
  1170. end;
  1171. function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer;
  1172. var
  1173. i : Integer;
  1174. begin
  1175. for i := Low(FLanguages) to High(FLanguages) do begin
  1176. if (FLanguages[i] = ALanguage) then begin
  1177. Result := i;
  1178. exit;
  1179. end;
  1180. end;
  1181. Result := -1;
  1182. end;
  1183. procedure TCldrCollationStreamLoader.LoadCollation(
  1184. const ALanguage : string;
  1185. ACollation : TCldrCollation;
  1186. AMode : TCldrParserMode
  1187. );
  1188. var
  1189. i : Integer;
  1190. locStream : TStream;
  1191. begin
  1192. i := IndexOf(ALanguage);
  1193. if (i < 0) then
  1194. CheckContent(ALanguage);
  1195. locStream := FStreams[i];
  1196. locStream.Position := 0;
  1197. ParseCollationDocument2(locStream,ACollation,AMode);
  1198. end;
  1199. procedure TCldrCollationStreamLoader.LoadCollationType(
  1200. const ALanguage,
  1201. ATypeName : string;
  1202. AType : TCldrCollationItem
  1203. );
  1204. var
  1205. i : Integer;
  1206. locStream : TStream;
  1207. begin
  1208. i := IndexOf(ALanguage);
  1209. if (i < 0) then
  1210. CheckContent(ALanguage);
  1211. locStream := FStreams[i];
  1212. locStream.Position := 0;
  1213. ParseCollationDocument2(locStream,AType,ATypeName);
  1214. end;
  1215. constructor TCldrCollationStreamLoader.Create(
  1216. const ALanguages : array of string;
  1217. const AStreams : array of TStream
  1218. );
  1219. var
  1220. c, i : Integer;
  1221. begin
  1222. c := Length(ALanguages);
  1223. if (Length(AStreams) < c) then
  1224. c := Length(AStreams);
  1225. SetLength(FLanguages,c);
  1226. SetLength(FStreams,c);
  1227. for i := Low(ALanguages) to High(ALanguages) do begin
  1228. FLanguages[i] := ALanguages[i];
  1229. FStreams[i] := AStreams[i];
  1230. end;
  1231. end;
  1232. destructor TCldrCollationStreamLoader.Destroy();
  1233. var
  1234. i : Integer;
  1235. begin
  1236. for i := Low(FStreams) to High(FStreams) do
  1237. FreeAndNil(FStreams[i]);
  1238. end;
  1239. { TCldrCollationFileLoader }
  1240. procedure TCldrCollationFileLoader.SetPath(APath: string);
  1241. var
  1242. s : string;
  1243. begin
  1244. if (APath = '') then
  1245. s := ''
  1246. else
  1247. s := IncludeTrailingPathDelimiter(APath);
  1248. if (s <> FPath) then
  1249. FPath := s;
  1250. end;
  1251. function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string;
  1252. begin
  1253. Result := Format('%s%s.xml',[FPath,ALanguage]);
  1254. end;
  1255. procedure TCldrCollationFileLoader.CheckFile(AFileName: string);
  1256. begin
  1257. if not FileExists(AFileName) then
  1258. raise EFOpenError.CreateFmt(SFOpenError,[AFileName]);
  1259. end;
  1260. procedure TCldrCollationFileLoader.LoadCollation(
  1261. const ALanguage : string;
  1262. ACollation : TCldrCollation;
  1263. AMode : TCldrParserMode
  1264. );
  1265. var
  1266. locFileName : string;
  1267. begin
  1268. locFileName := BuildFileName(ALanguage);
  1269. CheckFile(locFileName);
  1270. //ACollation.Clear();
  1271. ParseCollationDocument2(locFileName,ACollation,AMode);
  1272. end;
  1273. procedure TCldrCollationFileLoader.LoadCollationType(
  1274. const ALanguage,
  1275. ATypeName : string;
  1276. AType : TCldrCollationItem
  1277. );
  1278. var
  1279. locFileName : string;
  1280. begin
  1281. locFileName := BuildFileName(ALanguage);
  1282. CheckFile(locFileName);
  1283. //AType.Clear();
  1284. ParseCollationDocument2(locFileName,AType,ATypeName);
  1285. end;
  1286. constructor TCldrCollationFileLoader.Create(APath: string);
  1287. begin
  1288. SetPath(APath);
  1289. end;
  1290. end.