cldrxml.pas 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118
  1. { Parser of the CLDR collation xml files.
  2. Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. }
  16. { The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML,
  17. ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The
  18. old version was unsing a XML syntax for collation's rules specifications.
  19. The new versions (and going forward) will be using the text syntax.
  20. }
  21. unit cldrxml;
  22. {$mode objfpc}{$H+}
  23. {$TypedAddress on}
  24. interface
  25. uses
  26. Classes, SysUtils, DOM,
  27. cldrhelper;
  28. type
  29. { TCldrCollationFileLoader }
  30. TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader)
  31. private
  32. FPath : string;
  33. private
  34. procedure SetPath(APath : string);
  35. function BuildFileName(ALanguage : string) : string;
  36. procedure CheckFile(AFileName : string);
  37. protected
  38. procedure LoadCollation(
  39. const ALanguage : string;
  40. ACollation : TCldrCollation;
  41. AMode : TCldrParserMode
  42. );
  43. procedure LoadCollationType(
  44. const ALanguage,
  45. ATypeName : string;
  46. AType : TCldrCollationItem
  47. );
  48. public
  49. constructor Create(APath : string);
  50. end;
  51. { TCldrCollationStreamLoader }
  52. TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader)
  53. private
  54. FLanguages : array of string;
  55. FStreams : array of TStream;
  56. private
  57. procedure CheckContent(ALanguage : string);
  58. function IndexOf(ALanguage : string) : Integer;
  59. protected
  60. procedure LoadCollation(
  61. const ALanguage : string;
  62. ACollation : TCldrCollation;
  63. AMode : TCldrParserMode
  64. );
  65. procedure LoadCollationType(
  66. const ALanguage,
  67. ATypeName : string;
  68. AType : TCldrCollationItem
  69. );
  70. public
  71. constructor Create(
  72. const ALanguages : array of string;
  73. const AStreams : array of TStream
  74. );
  75. destructor Destroy();override;
  76. end;
  77. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
  78. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload;
  79. procedure ParseCollationDocumentXML(
  80. ADoc : TDOMDocument;
  81. ACollation : TCldrCollation;
  82. AMode : TCldrParserMode
  83. );overload;
  84. procedure ParseCollationDocumentXML(
  85. ADoc : TDOMDocument;
  86. ACollation : TCldrCollationItem;
  87. AType : string
  88. );overload;
  89. procedure ParseCollationDocumentXML(
  90. const AFileName : string;
  91. ACollation : TCldrCollation;
  92. AMode : TCldrParserMode
  93. );overload;
  94. procedure ParseCollationDocumentXML(
  95. const AFileName : string;
  96. ACollation : TCldrCollationItem;
  97. AType : string
  98. );overload;
  99. //-----------------------------------------------------
  100. procedure ParseCollationDocument2(
  101. ADoc : TDOMDocument;
  102. ACollation : TCldrCollation;
  103. AMode : TCldrParserMode
  104. );overload;
  105. procedure ParseCollationDocument2(
  106. const AFileName : string;
  107. ACollation : TCldrCollation;
  108. AMode : TCldrParserMode
  109. );overload;
  110. procedure ParseCollationDocument2(
  111. AStream : TStream;
  112. ACollation : TCldrCollation;
  113. AMode : TCldrParserMode
  114. );overload;
  115. procedure ParseCollationDocument2(
  116. const AFileName : string;
  117. ACollation : TCldrCollationItem;
  118. AType : string
  119. );overload;
  120. procedure ParseCollationDocument2(
  121. ADoc : TDOMDocument;
  122. ACollation : TCldrCollationItem;
  123. AType : string
  124. );overload;
  125. procedure ParseCollationDocument2(
  126. AStream : TStream;
  127. ACollation : TCldrCollationItem;
  128. AType : string
  129. );overload;
  130. implementation
  131. uses
  132. typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt;
  133. const
  134. s_ALT = 'alt';
  135. s_AT = 'at';
  136. //s_BEFORE = 'before';
  137. s_CODEPOINT = 'codepoint';
  138. s_COLLATION = 'collation';
  139. s_COLLATIONS = 'collations';
  140. s_CONTEXT = 'context';
  141. //s_DEFAULT = 'default';
  142. s_EXTEND = 'extend';
  143. s_HEX = 'hex';
  144. s_IMPORT = 'import';
  145. s_POSITION = 'position';
  146. s_RESET = 'reset';
  147. s_RULES = 'rules';
  148. s_SOURCE = 'source';
  149. //s_STANDART = 'standard';
  150. s_TYPE = 'type';
  151. s_CR = 'cr';
  152. procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
  153. begin
  154. if (ANode.NodeName <> AExpectedName) then
  155. raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
  156. end;
  157. function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
  158. begin
  159. case AChar of
  160. 'p' : Result := TReorderWeigthKind.PriMary;
  161. 's' : Result := TReorderWeigthKind.Secondary;
  162. 't' : Result := TReorderWeigthKind.Tertiary;
  163. 'i' : Result := TReorderWeigthKind.Identity;
  164. else
  165. Result := TReorderWeigthKind.Identity;
  166. end;
  167. end;
  168. function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
  169. var
  170. u4str : UCS4String;
  171. k : Integer;
  172. begin
  173. if (Length(AValue) = 0) then
  174. exit(nil);
  175. if (Length(AValue) = 1) then begin
  176. SetLength(Result,1);
  177. Result[0] := Ord(AValue[1])
  178. end else begin
  179. u4str := WideStringToUCS4String(AValue);
  180. k := Length(u4str) - 1; // remove the last #0
  181. SetLength(Result,k);
  182. for k := 0 to k - 1 do
  183. Result[k] := u4str[k];
  184. end;
  185. end;
  186. function ParseStatementXML(
  187. ARules : TDOMElement;
  188. AStartPosition : Integer;
  189. AStatement : PReorderSequence;
  190. var ANextPos : Integer
  191. ) : Boolean;
  192. var
  193. startPosition : Integer;
  194. statement : PReorderSequence;
  195. elementActualCount : Integer;
  196. list : TDOMNodeList;
  197. inBlock : Boolean;
  198. procedure SkipComments();
  199. begin
  200. while (startPosition < list.Count) do begin
  201. if (list[startPosition].NodeType <> COMMENT_NODE) then
  202. Break;
  203. Inc(startPosition);
  204. end;
  205. end;
  206. function parse_reset() : Integer;
  207. var
  208. n, t : TDOMNode;
  209. s : string;
  210. logicalPos : TReorderLogicalReset;
  211. begin
  212. SkipComments();
  213. n := list[startPosition];
  214. CheckNodeName(n,s_RESET);
  215. if n.HasChildNodes() then begin
  216. n := n.FirstChild;
  217. if (n.NodeType = TEXT_NODE) then begin
  218. statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
  219. Result := startPosition+1;
  220. end else begin
  221. if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
  222. raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
  223. statement^.LogicalPosition := logicalPos;
  224. Result := startPosition+1;
  225. end;
  226. end else if not n.HasChildNodes() then begin
  227. if (list[startPosition+1].NodeName = s_POSITION) then begin
  228. s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
  229. if not TryStrToLogicalReorder(s,logicalPos) then
  230. raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
  231. statement^.LogicalPosition := logicalPos;
  232. Result := startPosition+2;
  233. end else begin
  234. t := list[startPosition+1];
  235. {if (t.NodeType <> TEXT_NODE) then
  236. raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
  237. if (t.NodeType = TEXT_NODE) then
  238. statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
  239. else
  240. statement^.Reset := DomString2UnicodeCodePointArray(' ');
  241. Result := startPosition+2;
  242. end;
  243. end;
  244. if (statement^.LogicalPosition = TReorderLogicalReset.None) and
  245. (Length(statement^.Reset) = 0)
  246. then
  247. raise Exception.Create(sInvalidResetClause);
  248. end;
  249. procedure EnsureElementLength(const ALength : Integer);
  250. var
  251. k, d : Integer;
  252. begin
  253. k := Length(statement^.Elements);
  254. if (k < ALength) then begin
  255. k := ALength;
  256. if (k = 0) then begin
  257. k := 50;
  258. end else begin
  259. if (k < 10) then
  260. d := 10
  261. else
  262. d := 2;
  263. k := k * d;
  264. end;
  265. SetLength(statement^.Elements,k);
  266. end;
  267. end;
  268. procedure AddElement(
  269. const AChars : array of UCS4Char;
  270. const AWeigthKind : TReorderWeigthKind;
  271. const AContext : DOMString
  272. );overload;
  273. var
  274. kp : PReorderUnit;
  275. k : Integer;
  276. begin
  277. EnsureElementLength(elementActualCount+1);
  278. kp := @statement^.Elements[elementActualCount];
  279. SetLength(kp^.Characters,Length(AChars));
  280. for k := 0 to Length(AChars) - 1 do
  281. kp^.Characters[k] := AChars[k];
  282. kp^.WeigthKind := AWeigthKind;
  283. elementActualCount := elementActualCount + 1;
  284. if (AContext <> '') then
  285. kp^.Context := DomString2UnicodeCodePointArray(AContext);
  286. end;
  287. procedure ReadChars(
  288. ANode : TDOMNode;
  289. APos : Integer;
  290. var AChars : UCS4String
  291. );
  292. var
  293. t : TDOMNode;
  294. u4str : UCS4String;
  295. s : DOMString;
  296. begin
  297. if not ANode.HasChildNodes() then begin
  298. SetLength(AChars,1);
  299. AChars[0] := Ord(UnicodeChar(' '));
  300. exit;
  301. //raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
  302. end;
  303. t := ANode.FindNode(s_CODEPOINT);
  304. if (t = nil) then begin
  305. if (ANode.ChildNodes.Count <> 1) then
  306. raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
  307. t := ANode.ChildNodes[0];
  308. if not t.InheritsFrom(TDOMText) then
  309. raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
  310. s := TDOMText(t).Data;
  311. if (Length(s) = 1) then begin
  312. SetLength(AChars,1);
  313. AChars[0] := Ord(s[1]);
  314. end else begin
  315. u4str := WideStringToUCS4String(s);
  316. AChars := u4str;
  317. SetLength(AChars,Length(AChars)-1);
  318. end;
  319. end else begin
  320. t := t.Attributes.GetNamedItem(s_HEX);
  321. if (t = nil) then
  322. raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
  323. SetLength(AChars,1);
  324. AChars[0] := StrToInt('$'+t.NodeValue);
  325. end
  326. end;
  327. procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
  328. var
  329. k : Integer;
  330. begin
  331. k := Length(ADest);
  332. SetLength(ADest,(k+Length(APrefix)));
  333. Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
  334. for k := 0 to k - 1 do
  335. ADest[k] := APrefix[k];
  336. end;
  337. function ReadNextItem(const APos : Integer) : Integer;
  338. var
  339. n, t : TDOMNode;
  340. contextStr : DOMString;
  341. w : TReorderWeigthKind;
  342. isSimpleCharTag : Boolean;
  343. simpleCharTag : AnsiChar;
  344. last : PReorderUnit;
  345. u4str : UCS4String;
  346. k : Integer;
  347. begin
  348. contextStr := '';
  349. Result := APos;
  350. n := list[APos];
  351. isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
  352. if isSimpleCharTag then begin
  353. simpleCharTag := AnsiChar(n.NodeName[1]);
  354. if (simpleCharTag = 'x') then begin
  355. inBlock := True;
  356. n := n.FirstChild;
  357. if (n.NodeName = s_CONTEXT) then begin
  358. if n.HasChildNodes() then begin
  359. t := n.FirstChild;
  360. if (t.NodeType = TEXT_NODE) then
  361. contextStr := TDOMText(t).Data;
  362. end;
  363. n := n.NextSibling;
  364. end;
  365. isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
  366. if isSimpleCharTag then
  367. simpleCharTag := AnsiChar(n.NodeName[1]);
  368. end;
  369. end;
  370. if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
  371. w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
  372. ReadChars(n,APos,u4str);
  373. AddElement(u4str,w,contextStr);
  374. Result := Result + 1;
  375. if not inBlock then
  376. exit;
  377. last := @statement^.Elements[elementActualCount-1];
  378. n := n.NextSibling;
  379. if (n <> nil) and (n.NodeName = s_EXTEND) then begin
  380. ReadChars(n,APos,u4str);
  381. SetLength(last^.ExpansionChars,Length(u4str));
  382. for k := 0 to Length(u4str) - 1 do
  383. last^.ExpansionChars[k] := u4str[k];
  384. end;
  385. exit;
  386. end;
  387. if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
  388. (Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
  389. then begin
  390. w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
  391. ReadChars(n,APos,u4str);
  392. for k := Low(u4str) to High(u4str) do
  393. AddElement(u4str[k],w,contextStr);
  394. Result := Result + 1;
  395. exit;
  396. end;
  397. raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
  398. end;
  399. var
  400. i, c : Integer;
  401. n : TDOMNode;
  402. begin
  403. Result := False;
  404. inBlock := False;
  405. elementActualCount := 0;
  406. if (AStartPosition <= 0) then
  407. startPosition := 0
  408. else
  409. startPosition := AStartPosition;
  410. i := startPosition;
  411. list := ARules.ChildNodes;
  412. c := list.Count;
  413. if (c <= i) then
  414. exit;
  415. statement := AStatement;
  416. statement^.Clear();
  417. n := list[i];
  418. i := parse_reset();
  419. while (i < c) do begin
  420. n := list[i];
  421. if (n.NodeName = s_RESET) then
  422. Break;
  423. i := ReadNextItem(i);
  424. end;
  425. SetLength(statement^.Elements,elementActualCount);
  426. Result := (i > startPosition);
  427. if Result then
  428. ANextPos := i;
  429. end;
  430. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);
  431. var
  432. n : TDOMNode;
  433. rulesElement : TDOMElement;
  434. i, c, nextPost : Integer;
  435. statement : TReorderSequence;
  436. p : PReorderUnit;
  437. begin
  438. n := ADoc.DocumentElement.FindNode(s_RULES);
  439. if (n = nil) then
  440. raise Exception.Create(sRulesNodeNotFound);
  441. rulesElement := n as TDOMElement;
  442. c := rulesElement.ChildNodes.Count;
  443. ASequence^.Clear();
  444. SetLength(ASequence^.Data,c+100);
  445. nextPost := 0;
  446. i := 0;
  447. while (i < c) do begin
  448. statement.Clear();
  449. if not ParseStatementXML(rulesElement,i,@statement,nextPost) then
  450. Break;
  451. i := nextPost;
  452. try
  453. ASequence^.ApplyStatement(@statement);
  454. except
  455. on e : Exception do begin
  456. e.Message := Format('%s Position = %d',[e.Message,i]);
  457. raise;
  458. end;
  459. end;
  460. end;
  461. if (ASequence^.ActualLength > 0) then begin
  462. p := @ASequence^.Data[0];
  463. for i := 0 to ASequence^.ActualLength - 1 do begin
  464. p^.Changed := False;
  465. Inc(p);
  466. end;
  467. end;
  468. end;
  469. procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);
  470. var
  471. doc : TXMLDocument;
  472. begin
  473. ReadXMLFile(doc,AFileName);
  474. try
  475. ParseInitialDocumentXML(ASequence,doc);
  476. finally
  477. doc.Free();
  478. end;
  479. end;
  480. function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
  481. var
  482. xv : TXPathVariable;
  483. begin
  484. xv := EvaluateXPathExpression(AExpression,AContextNode);
  485. try
  486. if (xv <> nil) then
  487. Result := xv.AsText
  488. else
  489. Result := '';
  490. finally
  491. xv.Free();
  492. end;
  493. end;
  494. function ParseDeletion(
  495. const APattern : DOMString;
  496. ASequence : PReorderSequence
  497. ) : Integer;
  498. var
  499. r : array of TReorderUnit;
  500. c, i : Integer;
  501. uset : TUnicodeSet;
  502. it : TUnicodeSet.TIterator;
  503. p : PReorderUnit;
  504. begin
  505. if (APattern = '') then
  506. exit(0);
  507. it := nil;
  508. uset := TUnicodeSet.Create();
  509. try
  510. uset.AddPattern(APattern);
  511. it := uset.CreateIterator();
  512. c := 0;
  513. it.Reset();
  514. while it.MoveNext() do begin
  515. Inc(c);
  516. end;
  517. SetLength(r,c);
  518. p := @r[0];
  519. i := 0;
  520. it.Reset();
  521. while it.MoveNext() do begin
  522. p^.Clear();
  523. p^.WeigthKind := TReorderWeigthKind.Deletion;
  524. p^.Characters := Copy(it.GetCurrent());
  525. Inc(p);
  526. Inc(i);
  527. end;
  528. ASequence^.Clear();
  529. ASequence^.Elements := r;
  530. finally
  531. it.Free();
  532. uset.Free();
  533. end;
  534. r := nil;
  535. end;
  536. procedure ParseCollationItemXML(
  537. ACollationNode : TDOMElement;
  538. AItem : TCldrCollationItem;
  539. AMode : TCldrParserMode
  540. );
  541. var
  542. n : TDOMNode;
  543. rulesElement : TDOMElement;
  544. i, c, nextPos : Integer;
  545. statementList : TReorderSequenceArray;
  546. sal : Integer;//statement actual length
  547. statement : PReorderSequence;
  548. s : DOMString;
  549. begin
  550. AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
  551. AItem.Base := EvaluateXPathStr('base',ACollationNode);
  552. AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
  553. if AItem.Backwards then
  554. AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
  555. AItem.Rules := nil;
  556. if (AMode = TCldrParserMode.FullParsing) then begin
  557. SetLength(statementList,15);
  558. sal := 0;
  559. statement := @statementList[0];
  560. s := EvaluateXPathStr('suppress_contractions',ACollationNode);
  561. if (s <> '') then begin
  562. if (ParseDeletion(s,statement) > 0) then begin
  563. Inc(sal);
  564. Inc(statement);
  565. end else begin
  566. statement^.Clear();
  567. end;
  568. end;
  569. n := ACollationNode.FindNode(s_RULES);
  570. if (n <> nil) then begin
  571. rulesElement := n as TDOMElement;
  572. c := rulesElement.ChildNodes.Count;
  573. nextPos := 0;
  574. i := 0;
  575. while (i < c) do begin
  576. statement^.Clear();
  577. if not ParseStatementXML(rulesElement,i,statement,nextPos) then
  578. Break;
  579. i := nextPos;
  580. Inc(statement);
  581. Inc(sal);
  582. if (sal >= Length(statementList)) then begin
  583. SetLength(statementList,(sal*2));
  584. statement := @statementList[(sal-1)];
  585. end;
  586. end;
  587. end;
  588. SetLength(statementList,sal);
  589. AItem.Rules := statementList;
  590. end;
  591. end;
  592. procedure ParseImports(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
  593. var
  594. locList : TXPathVariable;
  595. i : Integer;
  596. nd, locAtt : TDOMNode;
  597. locSource, locType : string;
  598. begin
  599. locList := EvaluateXPathExpression(s_IMPORT,ACollationNode);
  600. try
  601. if not locList.InheritsFrom(TXPathNodeSetVariable) then
  602. exit;
  603. for i := 0 to locList.AsNodeSet.Count-1 do begin
  604. nd := TDOMNode(locList.AsNodeSet[i]);
  605. if (nd.Attributes <> nil) then begin
  606. locSource := '';
  607. locType := '';
  608. locAtt := nd.Attributes.GetNamedItem(s_SOURCE);
  609. if (locAtt <> nil) then
  610. locSource := locAtt.NodeValue;
  611. locAtt := nd.Attributes.GetNamedItem(s_TYPE);
  612. if (locAtt <> nil) then
  613. locType := locAtt.NodeValue;
  614. end;
  615. if (locType <> '') then
  616. AItem.Imports.Add(locSource,locType);
  617. end;
  618. finally
  619. locList.Free();
  620. end;
  621. end;
  622. procedure ParseCollationItem2(
  623. ACollationNode : TDOMElement;
  624. AItem : TCldrCollationItem;
  625. AMode : TCldrParserMode
  626. );
  627. var
  628. n : TDOMNode;
  629. rulesElement : TDOMCDATASection;
  630. i, c, nextPos : Integer;
  631. statementList : TReorderSequenceArray;
  632. sal : Integer;//statement actual length
  633. statement : PReorderSequence;
  634. s : DOMString;
  635. u8 : UTF8String;
  636. buffer : PAnsiChar;
  637. lineCount : Integer;
  638. begin
  639. AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
  640. AItem.Alt := ACollationNode.GetAttribute(s_ALT);
  641. AItem.Base := EvaluateXPathStr('base',ACollationNode);
  642. AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
  643. if AItem.Backwards then
  644. AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
  645. ParseImports(ACollationNode,AItem);
  646. AItem.Rules := nil;
  647. if (AMode = TCldrParserMode.FullParsing) then begin
  648. SetLength(statementList,15);
  649. sal := 0;
  650. statement := @statementList[0];
  651. s := EvaluateXPathStr('suppress_contractions',ACollationNode);
  652. if (s <> '') then begin
  653. if (ParseDeletion(s,statement) > 0) then begin
  654. Inc(sal);
  655. Inc(statement);
  656. end else begin
  657. statement^.Clear();
  658. end;
  659. end;
  660. n := ACollationNode.FindNode(s_CR);
  661. if (n <> nil) then begin
  662. n := (n as TDOMElement).FirstChild;
  663. rulesElement := n as TDOMCDATASection;
  664. s := rulesElement.Data;
  665. u8 := UTF8Encode(s);
  666. c := Length(u8);
  667. buffer := @u8[1];
  668. nextPos := 0;
  669. i := 0;
  670. lineCount := 0;
  671. while (i < c) do begin
  672. statement^.Clear();
  673. if not ParseStatement(buffer,i,c,statement,nextPos,lineCount) then
  674. Break;
  675. i := nextPos;
  676. Inc(statement);
  677. Inc(sal);
  678. if (sal >= Length(statementList)) then begin
  679. SetLength(statementList,(sal*2));
  680. statement := @statementList[(sal-1)];
  681. end;
  682. end;
  683. end;
  684. SetLength(statementList,sal);
  685. AItem.Rules := statementList;
  686. end;
  687. end;
  688. procedure ParseCollationDocumentXML(
  689. ADoc : TDOMDocument;
  690. ACollation : TCldrCollation;
  691. AMode : TCldrParserMode
  692. );
  693. var
  694. n : TDOMNode;
  695. collationsElement : TDOMElement;
  696. i, c : Integer;
  697. item : TCldrCollationItem;
  698. nl : TDOMNodeList;
  699. begin
  700. n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
  701. if (n = nil) then
  702. raise Exception.Create(sCollationsNodeNotFound);
  703. collationsElement := n as TDOMElement;
  704. ACollation.Clear();
  705. ACollation.Mode := AMode;
  706. ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
  707. ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
  708. ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
  709. if collationsElement.HasChildNodes() then begin
  710. nl := collationsElement.ChildNodes;
  711. c := nl.Count;
  712. item := nil;
  713. try
  714. for i := 0 to c - 1 do begin
  715. n := nl[i];
  716. if (n.NodeName = s_COLLATION) then begin
  717. item := TCldrCollationItem.Create();
  718. ParseCollationItemXML((n as TDOMElement),item,AMode);
  719. ACollation.Add(item);
  720. item := nil;
  721. end
  722. end;
  723. except
  724. FreeAndNil(item);
  725. raise;
  726. end;
  727. end;
  728. end;
  729. procedure ParseCollationDocumentXML(
  730. ADoc : TDOMDocument;
  731. ACollation : TCldrCollationItem;
  732. AType : string
  733. );
  734. var
  735. xv : TXPathVariable;
  736. begin
  737. xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
  738. try
  739. if (xv.AsNodeSet.Count = 0) then
  740. raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
  741. ACollation.Clear();
  742. ParseCollationItemXML((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
  743. finally
  744. xv.Free();
  745. end
  746. end;
  747. procedure ParseCollationDocument2(
  748. ADoc : TDOMDocument;
  749. ACollation : TCldrCollation;
  750. AMode : TCldrParserMode
  751. );
  752. var
  753. n : TDOMNode;
  754. collationsElement : TDOMElement;
  755. i, c : Integer;
  756. item : TCldrCollationItem;
  757. nl : TDOMNodeList;
  758. begin
  759. n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
  760. if (n = nil) then
  761. raise Exception.Create(sCollationsNodeNotFound);
  762. collationsElement := n as TDOMElement;
  763. ACollation.Clear();
  764. ACollation.Mode := AMode;
  765. ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
  766. ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
  767. ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement);
  768. if collationsElement.HasChildNodes() then begin
  769. nl := collationsElement.ChildNodes;
  770. c := nl.Count;
  771. item := nil;
  772. try
  773. for i := 0 to c - 1 do begin
  774. n := nl[i];
  775. if (n.NodeName = s_COLLATION) then begin
  776. item := TCldrCollationItem.Create();
  777. ParseCollationItem2((n as TDOMElement),item,AMode);
  778. ACollation.Add(item);
  779. item := nil;
  780. end
  781. end;
  782. except
  783. FreeAndNil(item);
  784. raise;
  785. end;
  786. end;
  787. end;
  788. procedure ParseCollationDocument2(
  789. ADoc : TDOMDocument;
  790. ACollation : TCldrCollationItem;
  791. AType : string
  792. );
  793. var
  794. xv : TXPathVariable;
  795. begin
  796. xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
  797. try
  798. if (xv.AsNodeSet.Count = 0) then
  799. raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
  800. ACollation.Clear();
  801. ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
  802. finally
  803. xv.Free();
  804. end
  805. end;
  806. function ReadXMLFile(f: TStream) : TXMLDocument;
  807. var
  808. src : TXMLInputSource;
  809. parser: TDOMParser;
  810. begin
  811. src := TXMLInputSource.Create(f);
  812. Result := TXMLDocument.Create;
  813. parser := TDOMParser.Create();
  814. try
  815. parser.Options.IgnoreComments := True;
  816. parser.Parse(src, Result);
  817. finally
  818. src.Free();
  819. parser.Free;
  820. end;
  821. end;
  822. function ReadXMLFile(const AFilename: String) : TXMLDocument;
  823. var
  824. FileStream: TStream;
  825. begin
  826. Result := nil;
  827. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  828. try
  829. Result := ReadXMLFile(FileStream);
  830. finally
  831. FileStream.Free;
  832. end;
  833. end;
  834. procedure ParseCollationDocumentXML(
  835. const AFileName : string;
  836. ACollation : TCldrCollation;
  837. AMode : TCldrParserMode
  838. );
  839. var
  840. doc : TXMLDocument;
  841. begin
  842. doc := ReadXMLFile(AFileName);
  843. try
  844. ParseCollationDocumentXML(doc,ACollation,AMode);
  845. ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
  846. finally
  847. doc.Free();
  848. end;
  849. end;
  850. procedure ParseCollationDocumentXML(
  851. const AFileName : string;
  852. ACollation : TCldrCollationItem;
  853. AType : string
  854. );
  855. var
  856. doc : TXMLDocument;
  857. begin
  858. doc := ReadXMLFile(AFileName);
  859. try
  860. ParseCollationDocumentXML(doc,ACollation,AType);
  861. finally
  862. doc.Free();
  863. end;
  864. end;
  865. procedure ParseCollationDocument2(
  866. const AFileName : string;
  867. ACollation : TCldrCollation;
  868. AMode : TCldrParserMode
  869. );
  870. var
  871. doc : TXMLDocument;
  872. begin
  873. doc := ReadXMLFile(AFileName);
  874. try
  875. ParseCollationDocument2(doc,ACollation,AMode);
  876. ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
  877. finally
  878. doc.Free();
  879. end;
  880. end;
  881. procedure ParseCollationDocument2(
  882. AStream : TStream;
  883. ACollation : TCldrCollation;
  884. AMode : TCldrParserMode
  885. );
  886. var
  887. doc : TXMLDocument;
  888. begin
  889. doc := ReadXMLFile(AStream);
  890. try
  891. ParseCollationDocument2(doc,ACollation,AMode);
  892. finally
  893. doc.Free();
  894. end;
  895. end;
  896. procedure ParseCollationDocument2(
  897. const AFileName : string;
  898. ACollation : TCldrCollationItem;
  899. AType : string
  900. );
  901. var
  902. doc : TXMLDocument;
  903. begin
  904. doc := ReadXMLFile(AFileName);
  905. try
  906. ParseCollationDocument2(doc,ACollation,AType);
  907. finally
  908. doc.Free();
  909. end;
  910. end;
  911. procedure ParseCollationDocument2(
  912. AStream : TStream;
  913. ACollation : TCldrCollationItem;
  914. AType : string
  915. );
  916. var
  917. doc : TXMLDocument;
  918. begin
  919. doc := ReadXMLFile(AStream);
  920. try
  921. ParseCollationDocument2(doc,ACollation,AType);
  922. finally
  923. doc.Free();
  924. end;
  925. end;
  926. { TCldrCollationStreamLoader }
  927. procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string);
  928. begin
  929. if not FileExists(ALanguage) then
  930. raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]);
  931. end;
  932. function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer;
  933. var
  934. i : Integer;
  935. begin
  936. for i := Low(FLanguages) to High(FLanguages) do begin
  937. if (FLanguages[i] = ALanguage) then begin
  938. Result := i;
  939. exit;
  940. end;
  941. end;
  942. Result := -1;
  943. end;
  944. procedure TCldrCollationStreamLoader.LoadCollation(
  945. const ALanguage : string;
  946. ACollation : TCldrCollation;
  947. AMode : TCldrParserMode
  948. );
  949. var
  950. i : Integer;
  951. locStream : TStream;
  952. begin
  953. i := IndexOf(ALanguage);
  954. if (i < 0) then
  955. CheckContent(ALanguage);
  956. locStream := FStreams[i];
  957. locStream.Position := 0;
  958. ParseCollationDocument2(locStream,ACollation,AMode);
  959. end;
  960. procedure TCldrCollationStreamLoader.LoadCollationType(
  961. const ALanguage,
  962. ATypeName : string;
  963. AType : TCldrCollationItem
  964. );
  965. var
  966. i : Integer;
  967. locStream : TStream;
  968. begin
  969. i := IndexOf(ALanguage);
  970. if (i < 0) then
  971. CheckContent(ALanguage);
  972. locStream := FStreams[i];
  973. locStream.Position := 0;
  974. ParseCollationDocument2(locStream,AType,ATypeName);
  975. end;
  976. constructor TCldrCollationStreamLoader.Create(
  977. const ALanguages : array of string;
  978. const AStreams : array of TStream
  979. );
  980. var
  981. c, i : Integer;
  982. begin
  983. c := Length(ALanguages);
  984. if (Length(AStreams) < c) then
  985. c := Length(AStreams);
  986. SetLength(FLanguages,c);
  987. SetLength(FStreams,c);
  988. for i := Low(ALanguages) to High(ALanguages) do begin
  989. FLanguages[i] := ALanguages[i];
  990. FStreams[i] := AStreams[i];
  991. end;
  992. end;
  993. destructor TCldrCollationStreamLoader.Destroy();
  994. var
  995. i : Integer;
  996. begin
  997. for i := Low(FStreams) to High(FStreams) do
  998. FreeAndNil(FStreams[i]);
  999. end;
  1000. { TCldrCollationFileLoader }
  1001. procedure TCldrCollationFileLoader.SetPath(APath: string);
  1002. var
  1003. s : string;
  1004. begin
  1005. if (APath = '') then
  1006. s := ''
  1007. else
  1008. s := IncludeLeadingPathDelimiter(APath);
  1009. if (s <> FPath) then
  1010. FPath := s;
  1011. end;
  1012. function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string;
  1013. begin
  1014. Result := Format('%s%s.xml',[FPath,ALanguage]);
  1015. end;
  1016. procedure TCldrCollationFileLoader.CheckFile(AFileName: string);
  1017. begin
  1018. if not FileExists(AFileName) then
  1019. raise EFOpenError.CreateFmt(SFOpenError,[AFileName]);
  1020. end;
  1021. procedure TCldrCollationFileLoader.LoadCollation(
  1022. const ALanguage : string;
  1023. ACollation : TCldrCollation;
  1024. AMode : TCldrParserMode
  1025. );
  1026. var
  1027. locFileName : string;
  1028. begin
  1029. locFileName := BuildFileName(ALanguage);
  1030. CheckFile(locFileName);
  1031. ACollation.Clear();
  1032. ParseCollationDocument2(locFileName,ACollation,AMode);
  1033. end;
  1034. procedure TCldrCollationFileLoader.LoadCollationType(
  1035. const ALanguage,
  1036. ATypeName : string;
  1037. AType : TCldrCollationItem
  1038. );
  1039. var
  1040. locFileName : string;
  1041. begin
  1042. locFileName := BuildFileName(ALanguage);
  1043. CheckFile(locFileName);
  1044. AType.Clear();
  1045. ParseCollationDocument2(locFileName,AType,ATypeName);
  1046. end;
  1047. constructor TCldrCollationFileLoader.Create(APath: string);
  1048. begin
  1049. SetPath(APath);
  1050. end;
  1051. end.