123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686 |
- { Parser of the CLDR collation xml files.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- }
- unit cldrxml;
- {$mode objfpc}{$H+}
- {$TypedAddress on}
- interface
- uses
- Classes, SysUtils, DOM,
- cldrhelper;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
- procedure ParseCollationDocument(
- ADoc : TDOMDocument;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );overload;
- procedure ParseCollationDocument(
- const AFileName : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );overload;
- procedure ParseCollationDocument(
- const AFileName : string;
- ACollation : TCldrCollationItem;
- AType : string
- );overload;
- procedure ParseCollationDocument(
- ADoc : TDOMDocument;
- ACollation : TCldrCollationItem;
- AType : string
- );overload;
- resourcestring
- sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
- sCodePointExpected = 'Code Point node expected as child at this position "%d".';
- sCollationsNodeNotFound = '"collations" node not found.';
- sCollationTypeNotFound = 'collation "Type" not found : "%s".';
- sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
- sInvalidResetClause = 'Invalid "Reset" clause.';
- sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
- sRulesNodeNotFound = '"rules" node not found.';
- sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".';
- sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".';
- sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".';
- implementation
- uses
- typinfo, XMLRead, XPath, Helper, unicodeset;
- const
- s_AT = 'at';
- //s_BEFORE = 'before';
- s_CODEPOINT = 'codepoint';
- s_COLLATION = 'collation';
- s_COLLATIONS = 'collations';
- s_CONTEXT = 'context';
- //s_DEFAULT = 'default';
- s_EXTEND = 'extend';
- s_HEX = 'hex';
- s_POSITION = 'position';
- s_RESET = 'reset';
- s_RULES = 'rules';
- //s_STANDART = 'standard';
- s_TYPE = 'type';
- procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
- begin
- if (ANode.NodeName <> AExpectedName) then
- raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
- end;
- function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
- begin
- case AChar of
- 'p' : Result := TReorderWeigthKind.PriMary;
- 's' : Result := TReorderWeigthKind.Secondary;
- 't' : Result := TReorderWeigthKind.Tertiary;
- 'i' : Result := TReorderWeigthKind.Identity;
- else
- Result := TReorderWeigthKind.Identity;
- end;
- end;
- function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
- var
- u4str : UCS4String;
- k : Integer;
- begin
- if (Length(AValue) = 0) then
- exit(nil);
- if (Length(AValue) = 1) then begin
- SetLength(Result,1);
- Result[0] := Ord(AValue[1])
- end else begin
- u4str := WideStringToUCS4String(AValue);
- k := Length(u4str) - 1; // remove the last #0
- SetLength(Result,k);
- for k := 0 to k - 1 do
- Result[k] := u4str[k];
- end;
- end;
- function TryStrToLogicalReorder(
- const AValue : string;
- out AResult : TReorderLogicalReset
- ) : Boolean;
- var
- s : string;
- i : Integer;
- begin
- s := StringReplace(AValue,' ','',[rfReplaceAll]);
- s := StringReplace(s,'_','',[rfReplaceAll]);
- i := GetEnumValue(TypeInfo(TReorderLogicalReset),s);
- Result := (i > -1);
- if Result then
- AResult := TReorderLogicalReset(i);
- end;
- function ParseStatement(
- ARules : TDOMElement;
- AStartPosition : Integer;
- AStatement : PReorderSequence;
- var ANextPos : Integer
- ) : Boolean;
- var
- startPosition : Integer;
- statement : PReorderSequence;
- elementActualCount : Integer;
- list : TDOMNodeList;
- inBlock : Boolean;
- procedure SkipComments();
- begin
- while (startPosition < list.Count) do begin
- if (list[startPosition].NodeType <> COMMENT_NODE) then
- Break;
- Inc(startPosition);
- end;
- end;
- function parse_reset() : Integer;
- var
- n, t : TDOMNode;
- s : string;
- logicalPos : TReorderLogicalReset;
- begin
- SkipComments();
- n := list[startPosition];
- CheckNodeName(n,s_RESET);
- if n.HasChildNodes() then begin
- n := n.FirstChild;
- if (n.NodeType = TEXT_NODE) then begin
- statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
- Result := startPosition+1;
- end else begin
- if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
- raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
- statement^.LogicalPosition := logicalPos;
- Result := startPosition+1;
- end;
- end else if not n.HasChildNodes() then begin
- if (list[startPosition+1].NodeName = s_POSITION) then begin
- s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
- if not TryStrToLogicalReorder(s,logicalPos) then
- raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
- statement^.LogicalPosition := logicalPos;
- Result := startPosition+2;
- end else begin
- t := list[startPosition+1];
- {if (t.NodeType <> TEXT_NODE) then
- raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
- if (t.NodeType = TEXT_NODE) then
- statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
- else
- statement^.Reset := DomString2UnicodeCodePointArray(' ');
- Result := startPosition+2;
- end;
- end;
- if (statement^.LogicalPosition = TReorderLogicalReset.None) and
- (Length(statement^.Reset) = 0)
- then
- raise Exception.Create(sInvalidResetClause);
- end;
- procedure EnsureElementLength(const ALength : Integer);
- var
- k, d : Integer;
- begin
- k := Length(statement^.Elements);
- if (k < ALength) then begin
- k := ALength;
- if (k = 0) then begin
- k := 50;
- end else begin
- if (k < 10) then
- d := 10
- else
- d := 2;
- k := k * d;
- end;
- SetLength(statement^.Elements,k);
- end;
- end;
- procedure AddElement(
- const AChars : array of UCS4Char;
- const AWeigthKind : TReorderWeigthKind;
- const AContext : DOMString
- );overload;
- var
- kp : PReorderUnit;
- k : Integer;
- begin
- EnsureElementLength(elementActualCount+1);
- kp := @statement^.Elements[elementActualCount];
- SetLength(kp^.Characters,Length(AChars));
- for k := 0 to Length(AChars) - 1 do
- kp^.Characters[k] := AChars[k];
- kp^.WeigthKind := AWeigthKind;
- elementActualCount := elementActualCount + 1;
- if (AContext <> '') then
- kp^.Context := DomString2UnicodeCodePointArray(AContext);
- end;
- procedure ReadChars(
- ANode : TDOMNode;
- APos : Integer;
- var AChars : UCS4String
- );
- var
- t : TDOMNode;
- u4str : UCS4String;
- s : DOMString;
- begin
- if not ANode.HasChildNodes() then begin
- SetLength(AChars,1);
- AChars[0] := Ord(UnicodeChar(' '));
- exit;
- //raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
- end;
- t := ANode.FindNode(s_CODEPOINT);
- if (t = nil) then begin
- if (ANode.ChildNodes.Count <> 1) then
- raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
- t := ANode.ChildNodes[0];
- if not t.InheritsFrom(TDOMText) then
- raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
- s := TDOMText(t).Data;
- if (Length(s) = 1) then begin
- SetLength(AChars,1);
- AChars[0] := Ord(s[1]);
- end else begin
- u4str := WideStringToUCS4String(s);
- AChars := u4str;
- SetLength(AChars,Length(AChars)-1);
- end;
- end else begin
- t := t.Attributes.GetNamedItem(s_HEX);
- if (t = nil) then
- raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
- SetLength(AChars,1);
- AChars[0] := StrToInt('$'+t.NodeValue);
- end
- end;
- procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
- var
- k : Integer;
- begin
- k := Length(ADest);
- SetLength(ADest,(k+Length(APrefix)));
- Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
- for k := 0 to k - 1 do
- ADest[k] := APrefix[k];
- end;
- function ReadNextItem(const APos : Integer) : Integer;
- var
- n, t : TDOMNode;
- contextStr : DOMString;
- w : TReorderWeigthKind;
- isSimpleCharTag : Boolean;
- simpleCharTag : AnsiChar;
- last : PReorderUnit;
- u4str : UCS4String;
- k : Integer;
- begin
- contextStr := '';
- Result := APos;
- n := list[APos];
- isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
- if isSimpleCharTag then begin
- simpleCharTag := AnsiChar(n.NodeName[1]);
- if (simpleCharTag = 'x') then begin
- inBlock := True;
- n := n.FirstChild;
- if (n.NodeName = s_CONTEXT) then begin
- if n.HasChildNodes() then begin
- t := n.FirstChild;
- if (t.NodeType = TEXT_NODE) then
- contextStr := TDOMText(t).Data;
- end;
- n := n.NextSibling;
- end;
- isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
- if isSimpleCharTag then
- simpleCharTag := AnsiChar(n.NodeName[1]);
- end;
- end;
- if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
- w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
- ReadChars(n,APos,u4str);
- AddElement(u4str,w,contextStr);
- Result := Result + 1;
- if not inBlock then
- exit;
- last := @statement^.Elements[elementActualCount-1];
- n := n.NextSibling;
- if (n <> nil) and (n.NodeName = s_EXTEND) then begin
- ReadChars(n,APos,u4str);
- SetLength(last^.ExpansionChars,Length(u4str));
- for k := 0 to Length(u4str) - 1 do
- last^.ExpansionChars[k] := u4str[k];
- end;
- exit;
- end;
- if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
- (Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
- then begin
- w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
- ReadChars(n,APos,u4str);
- for k := Low(u4str) to High(u4str) do
- AddElement(u4str[k],w,contextStr);
- Result := Result + 1;
- exit;
- end;
- raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
- end;
- var
- i, c : Integer;
- n : TDOMNode;
- begin
- Result := False;
- inBlock := False;
- elementActualCount := 0;
- if (AStartPosition <= 0) then
- startPosition := 0
- else
- startPosition := AStartPosition;
- i := startPosition;
- list := ARules.ChildNodes;
- c := list.Count;
- if (c <= i) then
- exit;
- statement := AStatement;
- statement^.Clear();
- n := list[i];
- i := parse_reset();
- while (i < c) do begin
- n := list[i];
- if (n.NodeName = s_RESET) then
- Break;
- i := ReadNextItem(i);
- end;
- SetLength(statement^.Elements,elementActualCount);
- Result := (i > startPosition);
- if Result then
- ANextPos := i;
- end;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);
- var
- n : TDOMNode;
- rulesElement : TDOMElement;
- i, c, nextPost : Integer;
- statement : TReorderSequence;
- p : PReorderUnit;
- begin
- n := ADoc.DocumentElement.FindNode(s_RULES);
- if (n = nil) then
- raise Exception.Create(sRulesNodeNotFound);
- rulesElement := n as TDOMElement;
- c := rulesElement.ChildNodes.Count;
- ASequence^.Clear();
- SetLength(ASequence^.Data,c+100);
- nextPost := 0;
- i := 0;
- while (i < c) do begin
- statement.Clear();
- if not ParseStatement(rulesElement,i,@statement,nextPost) then
- Break;
- i := nextPost;
- try
- ASequence^.ApplyStatement(@statement);
- except
- on e : Exception do begin
- e.Message := Format('%s Position = %d',[e.Message,i]);
- raise;
- end;
- end;
- end;
- if (ASequence^.ActualLength > 0) then begin
- p := @ASequence^.Data[0];
- for i := 0 to ASequence^.ActualLength - 1 do begin
- p^.Changed := False;
- Inc(p);
- end;
- end;
- end;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);
- var
- doc : TXMLDocument;
- begin
- ReadXMLFile(doc,AFileName);
- try
- ParseInitialDocument(ASequence,doc);
- finally
- doc.Free();
- end;
- end;
- function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
- var
- xv : TXPathVariable;
- begin
- xv := EvaluateXPathExpression(AExpression,AContextNode);
- try
- if (xv <> nil) then
- Result := xv.AsText
- else
- Result := '';
- finally
- xv.Free();
- end;
- end;
- function ParseDeletion(
- const APattern : DOMString;
- ASequence : PReorderSequence
- ) : Integer;
- var
- r : array of TReorderUnit;
- c, i : Integer;
- uset : TUnicodeSet;
- it : TUnicodeSet.TIterator;
- p : PReorderUnit;
- begin
- if (APattern = '') then
- exit(0);
- it := nil;
- uset := TUnicodeSet.Create();
- try
- uset.AddPattern(APattern);
- it := uset.CreateIterator();
- c := 0;
- it.Reset();
- while it.MoveNext() do begin
- Inc(c);
- end;
- SetLength(r,c);
- p := @r[0];
- i := 0;
- it.Reset();
- while it.MoveNext() do begin
- p^.Clear();
- p^.WeigthKind := TReorderWeigthKind.Deletion;
- p^.Characters := Copy(it.GetCurrent());
- Inc(p);
- Inc(i);
- end;
- ASequence^.Clear();
- ASequence^.Elements := r;
- finally
- it.Free();
- uset.Free();
- end;
- SetLength(r,0);
- end;
- procedure ParseCollationItem(
- ACollationNode : TDOMElement;
- AItem : TCldrCollationItem;
- AMode : TCldrParserMode
- );
- var
- n : TDOMNode;
- rulesElement : TDOMElement;
- i, c, nextPos : Integer;
- statementList : TReorderSequenceArray;
- sal : Integer;//statement actual length
- statement : PReorderSequence;
- s : DOMString;
- begin
- AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
- AItem.Base := EvaluateXPathStr('base',ACollationNode);
- AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
- if AItem.Backwards then
- AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
- AItem.Rules := nil;
- if (AMode = TCldrParserMode.FullParsing) then begin
- SetLength(statementList,15);
- sal := 0;
- statement := @statementList[0];
- s := EvaluateXPathStr('suppress_contractions',ACollationNode);
- if (s <> '') then begin
- if (ParseDeletion(s,statement) > 0) then begin
- Inc(sal);
- Inc(statement);
- end else begin
- statement^.Clear();
- end;
- end;
- n := ACollationNode.FindNode(s_RULES);
- if (n <> nil) then begin
- rulesElement := n as TDOMElement;
- c := rulesElement.ChildNodes.Count;
- nextPos := 0;
- i := 0;
- while (i < c) do begin
- statement^.Clear();
- if not ParseStatement(rulesElement,i,statement,nextPos) then
- Break;
- i := nextPos;
- Inc(statement);
- Inc(sal);
- if (sal >= Length(statementList)) then begin
- SetLength(statementList,(sal*2));
- statement := @statementList[(sal-1)];
- end;
- end;
- end;
- SetLength(statementList,sal);
- AItem.Rules := statementList;
- end;
- end;
- procedure ParseCollationDocument(
- ADoc : TDOMDocument;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- n : TDOMNode;
- collationsElement : TDOMElement;
- i, c : Integer;
- item : TCldrCollationItem;
- nl : TDOMNodeList;
- begin
- n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
- if (n = nil) then
- raise Exception.Create(sCollationsNodeNotFound);
- collationsElement := n as TDOMElement;
- ACollation.Clear();
- ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
- ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
- ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
- if collationsElement.HasChildNodes() then begin
- nl := collationsElement.ChildNodes;
- c := nl.Count;
- item := nil;
- try
- for i := 0 to c - 1 do begin
- n := nl[i];
- if (n.NodeName = s_COLLATION) then begin
- item := TCldrCollationItem.Create();
- ParseCollationItem((n as TDOMElement),item,AMode);
- ACollation.Add(item);
- item := nil;
- end
- end;
- except
- FreeAndNil(item);
- raise;
- end;
- end;
- end;
- procedure ParseCollationDocument(
- ADoc : TDOMDocument;
- ACollation : TCldrCollationItem;
- AType : string
- );
- var
- xv : TXPathVariable;
- begin
- xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
- try
- if (xv.AsNodeSet.Count = 0) then
- raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
- ACollation.Clear();
- ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
- finally
- xv.Free();
- end
- end;
- function ReadXMLFile(f: TStream) : TXMLDocument;
- var
- src : TXMLInputSource;
- parser: TDOMParser;
- begin
- src := TXMLInputSource.Create(f);
- Result := TXMLDocument.Create;
- parser := TDOMParser.Create();
- try
- parser.Options.IgnoreComments := True;
- parser.Parse(src, Result);
- finally
- src.Free();
- parser.Free;
- end;
- end;
- function ReadXMLFile(const AFilename: String) : TXMLDocument;
- var
- FileStream: TStream;
- begin
- Result := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- Result := ReadXMLFile(FileStream);
- finally
- FileStream.Free;
- end;
- end;
- procedure ParseCollationDocument(
- const AFileName : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AFileName);
- try
- ParseCollationDocument(doc,ACollation,AMode);
- ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
- finally
- doc.Free();
- end;
- end;
- procedure ParseCollationDocument(
- const AFileName : string;
- ACollation : TCldrCollationItem;
- AType : string
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AFileName);
- try
- ParseCollationDocument(doc,ACollation,AType);
- finally
- doc.Free();
- end;
- end;
- end.
|