1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207 |
- { Parser of the CLDR collation xml files.
- Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- }
- { The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML,
- ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The
- old version was unsing a XML syntax for collation's rules specifications.
- The new versions (and going forward) will be using the text syntax.
- }
- unit cldrxml;
- {$mode delphi}{$H+}
- {$TypedAddress on}
- interface
- uses
- Classes, SysUtils, DOM,
- cldrhelper;
- type
- { TCldrCollationFileLoader }
- TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader)
- private
- FPath : string;
- private
- procedure SetPath(APath : string);
- function BuildFileName(ALanguage : string) : string;
- procedure CheckFile(AFileName : string);
- protected
- procedure LoadCollation(
- const ALanguage : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- procedure LoadCollationType(
- const ALanguage,
- ATypeName : string;
- AType : TCldrCollationItem
- );
- public
- constructor Create(APath : string);
- end;
- { TCldrCollationStreamLoader }
- TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader)
- private
- FLanguages : array of string;
- FStreams : array of TStream;
- private
- procedure CheckContent(ALanguage : string);
- function IndexOf(ALanguage : string) : Integer;
- protected
- procedure LoadCollation(
- const ALanguage : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- procedure LoadCollationType(
- const ALanguage,
- ATypeName : string;
- AType : TCldrCollationItem
- );
- public
- constructor Create(
- const ALanguages : array of string;
- const AStreams : array of TStream
- );
- destructor Destroy();override;
- end;
- procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
- procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload;
- //-----------------------------------------------------
- procedure ParseCollationDocument2(
- ADoc : TDOMDocument;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );overload;
- procedure ParseCollationDocument2(
- const AFileName : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );overload;
- procedure ParseCollationDocument2(
- AStream : TStream;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );overload;
- procedure ParseCollationDocument2(
- const AFileName : string;
- ACollation : TCldrCollationItem;
- AType : string
- );overload;
- procedure ParseCollationDocument2(
- ADoc : TDOMDocument;
- ACollation : TCldrCollationItem;
- AType : string
- );overload;
- procedure ParseCollationDocument2(
- AStream : TStream;
- ACollation : TCldrCollationItem;
- AType : string
- );overload;
- implementation
- uses
- typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt;
- const
- s_ALT = 'alt';
- s_AT = 'at';
- //s_BEFORE = 'before';
- s_CODEPOINT = 'codepoint';
- s_COLLATION = 'collation';
- s_COLLATIONS = 'collations';
- s_CONTEXT = 'context';
- //s_DEFAULT = 'default';
- s_EXTEND = 'extend';
- s_HEX = 'hex';
- s_POSITION = 'position';
- s_RESET = 'reset';
- s_RULES = 'rules';
- //s_STANDART = 'standard';
- s_TYPE = 'type';
- s_CR = 'cr';
- procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
- begin
- if (ANode.NodeName <> AExpectedName) then
- raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
- end;
- function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
- begin
- case AChar of
- 'p' : Result := TReorderWeigthKind.PriMary;
- 's' : Result := TReorderWeigthKind.Secondary;
- 't' : Result := TReorderWeigthKind.Tertiary;
- 'i' : Result := TReorderWeigthKind.Identity;
- else
- Result := TReorderWeigthKind.Identity;
- end;
- end;
- function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
- var
- u4str : UCS4String;
- k : Integer;
- begin
- if (Length(AValue) = 0) then
- exit(nil);
- if (Length(AValue) = 1) then begin
- SetLength(Result,1);
- Result[0] := Ord(AValue[1])
- end else begin
- u4str := WideStringToUCS4String(AValue);
- k := Length(u4str) - 1; // remove the last #0
- SetLength(Result,k);
- for k := 0 to k - 1 do
- Result[k] := u4str[k];
- end;
- end;
- function ParseStatementXML(
- ARules : TDOMElement;
- AStartPosition : Integer;
- AStatement : PReorderSequence;
- var ANextPos : Integer
- ) : Boolean;
- var
- startPosition : Integer;
- statement : PReorderSequence;
- elementActualCount : Integer;
- list : TDOMNodeList;
- inBlock : Boolean;
- procedure SkipComments();
- begin
- while (startPosition < list.Count) do begin
- if (list[startPosition].NodeType <> COMMENT_NODE) then
- Break;
- Inc(startPosition);
- end;
- end;
- function parse_reset() : Integer;
- var
- n, t : TDOMNode;
- s : string;
- logicalPos : TReorderLogicalReset;
- begin
- SkipComments();
- n := list[startPosition];
- CheckNodeName(n,s_RESET);
- if n.HasChildNodes() then begin
- n := n.FirstChild;
- if (n.NodeType = TEXT_NODE) then begin
- statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
- Result := startPosition+1;
- end else begin
- if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
- raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
- statement^.LogicalPosition := logicalPos;
- Result := startPosition+1;
- end;
- end else if not n.HasChildNodes() then begin
- if (list[startPosition+1].NodeName = s_POSITION) then begin
- s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
- if not TryStrToLogicalReorder(s,logicalPos) then
- raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
- statement^.LogicalPosition := logicalPos;
- Result := startPosition+2;
- end else begin
- t := list[startPosition+1];
- {if (t.NodeType <> TEXT_NODE) then
- raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
- if (t.NodeType = TEXT_NODE) then
- statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
- else
- statement^.Reset := DomString2UnicodeCodePointArray(' ');
- Result := startPosition+2;
- end;
- end;
- if (statement^.LogicalPosition = TReorderLogicalReset.None) and
- (Length(statement^.Reset) = 0)
- then
- raise Exception.Create(sInvalidResetClause);
- end;
- procedure EnsureElementLength(const ALength : Integer);
- var
- k, d : Integer;
- begin
- k := Length(statement^.Elements);
- if (k < ALength) then begin
- k := ALength;
- if (k = 0) then begin
- k := 50;
- end else begin
- if (k < 10) then
- d := 10
- else
- d := 2;
- k := k * d;
- end;
- SetLength(statement^.Elements,k);
- end;
- end;
- procedure AddElement(
- const AChars : array of UCS4Char;
- const AWeigthKind : TReorderWeigthKind;
- const AContext : DOMString
- );overload;
- var
- kp : PReorderUnit;
- k : Integer;
- begin
- EnsureElementLength(elementActualCount+1);
- kp := @statement^.Elements[elementActualCount];
- SetLength(kp^.Characters,Length(AChars));
- for k := 0 to Length(AChars) - 1 do
- kp^.Characters[k] := AChars[k];
- kp^.WeigthKind := AWeigthKind;
- elementActualCount := elementActualCount + 1;
- if (AContext <> '') then
- kp^.Context := DomString2UnicodeCodePointArray(AContext);
- end;
- procedure ReadChars(
- ANode : TDOMNode;
- APos : Integer;
- var AChars : UCS4String
- );
- var
- t : TDOMNode;
- u4str : UCS4String;
- s : DOMString;
- begin
- if not ANode.HasChildNodes() then begin
- SetLength(AChars,1);
- AChars[0] := Ord(UnicodeChar(' '));
- exit;
- //raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
- end;
- t := ANode.FindNode(s_CODEPOINT);
- if (t = nil) then begin
- if (ANode.ChildNodes.Count <> 1) then
- raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
- t := ANode.ChildNodes[0];
- if not t.InheritsFrom(TDOMText) then
- raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
- s := TDOMText(t).Data;
- if (Length(s) = 1) then begin
- SetLength(AChars,1);
- AChars[0] := Ord(s[1]);
- end else begin
- u4str := WideStringToUCS4String(s);
- AChars := u4str;
- SetLength(AChars,Length(AChars)-1);
- end;
- end else begin
- t := t.Attributes.GetNamedItem(s_HEX);
- if (t = nil) then
- raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
- SetLength(AChars,1);
- AChars[0] := StrToInt('$'+t.NodeValue);
- end
- end;
- procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
- var
- k : Integer;
- begin
- k := Length(ADest);
- SetLength(ADest,(k+Length(APrefix)));
- Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
- for k := 0 to k - 1 do
- ADest[k] := APrefix[k];
- end;
- function ReadNextItem(const APos : Integer) : Integer;
- var
- n, t : TDOMNode;
- contextStr : DOMString;
- w : TReorderWeigthKind;
- isSimpleCharTag : Boolean;
- simpleCharTag : AnsiChar;
- last : PReorderUnit;
- u4str : UCS4String;
- k : Integer;
- begin
- contextStr := '';
- Result := APos;
- n := list[APos];
- isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
- if isSimpleCharTag then begin
- simpleCharTag := AnsiChar(n.NodeName[1]);
- if (simpleCharTag = 'x') then begin
- inBlock := True;
- n := n.FirstChild;
- if (n.NodeName = s_CONTEXT) then begin
- if n.HasChildNodes() then begin
- t := n.FirstChild;
- if (t.NodeType = TEXT_NODE) then
- contextStr := TDOMText(t).Data;
- end;
- n := n.NextSibling;
- end;
- isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
- if isSimpleCharTag then
- simpleCharTag := AnsiChar(n.NodeName[1]);
- end;
- end;
- if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
- w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
- ReadChars(n,APos,u4str);
- AddElement(u4str,w,contextStr);
- Result := Result + 1;
- if not inBlock then
- exit;
- last := @statement^.Elements[elementActualCount-1];
- n := n.NextSibling;
- if (n <> nil) and (n.NodeName = s_EXTEND) then begin
- ReadChars(n,APos,u4str);
- SetLength(last^.ExpansionChars,Length(u4str));
- for k := 0 to Length(u4str) - 1 do
- last^.ExpansionChars[k] := u4str[k];
- end;
- exit;
- end;
- if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
- (Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
- then begin
- w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
- ReadChars(n,APos,u4str);
- for k := Low(u4str) to High(u4str) do
- AddElement(u4str[k],w,contextStr);
- Result := Result + 1;
- exit;
- end;
- raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
- end;
- var
- i, c : Integer;
- n : TDOMNode;
- begin
- Result := False;
- inBlock := False;
- elementActualCount := 0;
- if (AStartPosition <= 0) then
- startPosition := 0
- else
- startPosition := AStartPosition;
- i := startPosition;
- list := ARules.ChildNodes;
- c := list.Count;
- if (c <= i) then
- exit;
- statement := AStatement;
- statement^.Clear();
- n := list[i];
- i := parse_reset();
- while (i < c) do begin
- n := list[i];
- if (n.NodeName = s_RESET) then
- Break;
- i := ReadNextItem(i);
- end;
- SetLength(statement^.Elements,elementActualCount);
- Result := (i > startPosition);
- if Result then
- ANextPos := i;
- end;
- procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);
- var
- n : TDOMNode;
- rulesElement : TDOMElement;
- i, c, nextPost : Integer;
- statement : TReorderSequence;
- p : PReorderUnit;
- begin
- n := ADoc.DocumentElement.FindNode(s_RULES);
- if (n = nil) then
- raise Exception.Create(sRulesNodeNotFound);
- rulesElement := n as TDOMElement;
- c := rulesElement.ChildNodes.Count;
- ASequence^.Clear();
- SetLength(ASequence^.Data,c+100);
- nextPost := 0;
- i := 0;
- while (i < c) do begin
- statement.Clear();
- if not ParseStatementXML(rulesElement,i,@statement,nextPost) then
- Break;
- i := nextPost;
- try
- ASequence^.ApplyStatement(@statement);
- except
- on e : Exception do begin
- e.Message := Format('%s Position = %d',[e.Message,i]);
- raise;
- end;
- end;
- end;
- if (ASequence^.ActualLength > 0) then begin
- p := @ASequence^.Data[0];
- for i := 0 to ASequence^.ActualLength - 1 do begin
- p^.Changed := False;
- Inc(p);
- end;
- end;
- end;
- procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);
- var
- doc : TXMLDocument;
- begin
- ReadXMLFile(doc,AFileName);
- try
- ParseInitialDocumentXML(ASequence,doc);
- finally
- doc.Free();
- end;
- end;
- function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
- var
- xv : TXPathVariable;
- begin
- xv := EvaluateXPathExpression(AExpression,AContextNode);
- try
- if (xv <> nil) then
- Result := xv.AsText
- else
- Result := '';
- finally
- xv.Free();
- end;
- end;
- function ParseDeletion(
- const APattern : DOMString;
- ASequence : PReorderSequence
- ) : Integer;
- var
- r : array of TReorderUnit;
- c : Integer;
- uset : TUnicodeSet;
- it : TUnicodeSet.TIterator;
- p : PReorderUnit;
- begin
- if (APattern = '') then
- exit(0);
- it := nil;
- uset := TUnicodeSet.Create();
- try
- uset.AddPattern(APattern);
- it := uset.CreateIterator();
- c := 0;
- it.Reset();
- while it.MoveNext() do begin
- Inc(c);
- end;
- SetLength(r,c);
- p := @r[0];
- it.Reset();
- while it.MoveNext() do begin
- p^.Clear();
- p^.WeigthKind := TReorderWeigthKind.Deletion;
- p^.Characters := Copy(it.GetCurrent());
- Inc(p);
- end;
- ASequence^.Clear();
- ASequence^.Elements := r;
- finally
- it.Free();
- uset.Free();
- end;
- r := nil;
- Result := c;
- end;
- function NextPart(
- const ABuffer : string;
- const AStartPos : Integer;
- const ASeparator : Char;
- out ANextStart : Integer
- ) : string;
- var
- c, sp, i : Integer;
- begin
- c := Length(ABuffer);
- if (c < 1) or (AStartPos > c) then begin
- ANextStart := c+1;
- Result := '';
- exit;
- end;
- if (AStartPos > 0) then
- sp := AStartPos
- else
- sp := 1;
- i := sp;
- while (i <= c) do begin
- if (ABuffer[i] = ASeparator) then
- break;
- i := i+1;
- end;
- Result := Copy(ABuffer,sp,(i-sp));
- if (i <= c) then
- i := i+1;
- ANextStart := i;
- end;
- procedure HandleSetting_Import(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- var
- buffer, lang, col, s : UTF8String;
- i, ns : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
- end;
- buffer := ASetting^.Values[0];
- lang := NextPart(buffer,1,'-',ns);
- i := ns;
- col := '';
- s := NextPart(buffer,i,'-',ns);
- if (s <> '') then begin
- if (s <> 'u') then
- raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
- i := ns;
- s := NextPart(buffer,i,'-',ns);
- if (s <> 'co') then
- raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
- s := Trim(Copy(buffer,ns,(Length(buffer)-ns+1)));
- if (s = '') then
- raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
- col := s;
- end;
- if (col = '') then
- col := COLLATION_ITEM_DEFAULT;
- if (LowerCase(lang) = 'und') then
- lang := 'root';
- AItem.Imports.Add(lang,col);
- ASetting^.Understood := True;
- end;
- procedure HandleSetting_Backwards(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- var
- buffer : UTF8String;
- i : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidBackwardsStatement,[buffer]);
- end;
- if (ASetting^.Values[0] = '2') then
- AItem.Backwards := True
- else
- raise Exception.CreateFmt(
- sInvalidSettingValue,
- [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
- );
- AItem.ChangedFields := AItem.ChangedFields+[TCollationField.BackWards];
- ASetting^.Understood := True;
- end;
- procedure HandleSetting_Alternate(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- var
- buffer : UTF8String;
- i : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidAlternateStatement,[buffer]);
- end;
- buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
- if (buffer = 'non-ignorable') then
- AItem.VariableWeight := ucaNonIgnorable
- else if (buffer = 'shifted') then
- AItem.VariableWeight := ucaShifted
- else
- raise Exception.CreateFmt(
- sInvalidSettingValue,
- [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
- );
- AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Alternate];
- ASetting^.Understood := True;
- end;
- procedure HandleSetting_Normalization(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- var
- buffer : UTF8String;
- i : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidNormalizationStatement,[buffer]);
- end;
- buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
- if (buffer = 'off') then
- AItem.Normalization := False
- else if (buffer = 'on') then
- AItem.Normalization := True
- else
- raise Exception.CreateFmt(
- sInvalidSettingValue,
- [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
- );
- AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Normalization];
- ASetting^.Understood := True;
- end;
- procedure HandleSetting_Strength(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- var
- buffer : UTF8String;
- i : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidStrengthStatement,[buffer]);
- end;
- buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
- if (buffer = '1') then
- AItem.Strength := TComparisonStrength.Primary
- else if (buffer = '2') then
- AItem.Strength := TComparisonStrength.Secondary
- else if (buffer = '3') then
- AItem.Strength := TComparisonStrength.Tertiary
- else if (buffer = '4') then
- AItem.Strength := TComparisonStrength.Quaternary
- else if (buffer = 'i') then
- AItem.Strength := TComparisonStrength.Identity
- else
- raise Exception.CreateFmt(
- sInvalidSettingValue,
- [SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
- );
- AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Strength];
- ASetting^.Understood := True;
- end;
- procedure HandleSetting_EMPTY_PROC(
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- begin
- //
- end;
- type
- TSettingHandlerProc = procedure (
- AItem : TCldrCollationItem;
- ASetting : PSettingRec
- );
- const
- SETTING_HANDLERS : array[TSettingOption] of TSettingHandlerProc =(
- HandleSetting_EMPTY_PROC, HandleSetting_Strength, HandleSetting_Alternate,
- //Unknown, Strength, Alternate,
- HandleSetting_Backwards, HandleSetting_Normalization, HandleSetting_EMPTY_PROC,
- //Backwards, Normalization, CaseLevel,
- HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC,
- //CaseFirst, HiraganaQ, NumericOrdering,
- HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_Import,
- //Reorder, MaxVariable Import
- HandleSetting_EMPTY_PROC,
- //SuppressContractions has a special handling see Process_SuppressContractions
- HandleSetting_EMPTY_PROC
- //Optimize
- );
- procedure HandleSetting(AItem : TCldrCollationItem; ASetting : PSettingRec);
- begin
- if not ASetting^.Understood then
- SETTING_HANDLERS[ASetting^.OptionValue](AItem,ASetting);
- end;
- procedure HandleSettings(AItem : TCldrCollationItem);
- var
- i, c : Integer;
- p : PSettingRec;
- begin
- c := Length(AItem.Settings);
- if (c < 1) then
- exit;
- p := @AItem.Settings[0];
- for i := 0 to c-1 do begin
- HandleSetting(AItem,p);
- Inc(p);
- end;
- end;
- function Process_SuppressContractions(
- ASetting : PSettingRec;
- AStatement : PReorderSequence
- ) : Boolean;
- var
- buffer : UTF8String;
- i : Integer;
- begin
- if (Length(ASetting^.Values) <> 1) then begin
- buffer := '';
- if (Length(ASetting^.Values) > 0) then begin
- for i := 0 to Length(ASetting^.Values)-1 do
- buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
- end;
- raise Exception.CreateFmt(sInvalidSuppressContractionsStatement,[buffer]);
- end;
- Result := (ParseDeletion(DOMString(ASetting^.Values[0]),AStatement) > 0);
- ASetting.Understood := Result;
- end;
- procedure ParseCollationItem2(
- ACollationNode : TDOMElement;
- AItem : TCldrCollationItem;
- AMode : TCldrParserMode
- );
- var
- statementList : TCldrCollationRuleArray;
- sal : Integer;//statement actual length
- procedure AddStatementToArray(AStatement : PReorderSequence);
- begin
- statementList[sal].Kind := TCldrCollationRuleKind.ReorderSequence;
- statementList[sal].Reorder.Assign(AStatement);
- Inc(sal);
- if (sal >= Length(statementList)) then
- SetLength(statementList,(sal*2));
- end;
- procedure AddImportToArray(AImport : TCldrImport);
- begin
- statementList[sal].Kind := TCldrCollationRuleKind.Import;
- statementList[sal].Import := AImport;
- Inc(sal);
- if (sal >= Length(statementList)) then
- SetLength(statementList,(sal*2));
- end;
- var
- n : TDOMNode;
- rulesElement : TDOMCDATASection;
- i, c, nextPos : Integer;
- parsedStatement : TParsedStatement;
- s : DOMString;
- u8 : UTF8String;
- buffer : PAnsiChar;
- lineCount : Integer;
- settingArray : TSettingRecArray;
- begin
- AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
- AItem.Alt := ACollationNode.GetAttribute(s_ALT);
- AItem.Settings := nil;
- AItem.Rules := nil;
- AItem.Mode := AMode;
- if (AMode = TCldrParserMode.FullParsing) then begin
- SetLength(statementList,15);
- sal := 0;
- n := ACollationNode.FindNode(s_CR);
- if (n <> nil) then begin
- n := (n as TDOMElement).FirstChild;
- rulesElement := n as TDOMCDATASection;
- s := rulesElement.Data;
- u8 := UTF8Encode(s);
- c := Length(u8);
- buffer := @u8[1];
- nextPos := 0;
- i := 0;
- lineCount := 0;
- Clear(parsedStatement);
- settingArray := AItem.Settings;
- while (i < c) do begin
- if not ParseStatement(buffer,i,c,@parsedStatement,nextPos,lineCount) then
- Break;
- if (parsedStatement.Kind = TStatementKind.Sequence) then begin
- AddStatementToArray(@parsedStatement.ReorderSequence);
- end else if (parsedStatement.Kind = TStatementKind.Setting) then begin
- if (parsedStatement.Setting.OptionValue = TSettingOption.SuppressContractions) then begin
- if Process_SuppressContractions(@parsedStatement.Setting,@parsedStatement.ReorderSequence) then
- AddStatementToArray(@parsedStatement.ReorderSequence);
- end;
- AddItem(settingArray,@parsedStatement.Setting);
- if (parsedStatement.Setting.OptionValue = TSettingOption.Import) then begin
- HandleSetting(AItem,@settingArray[Length(settingArray)-1]);
- AddImportToArray(AItem.Imports[AItem.Imports.Count-1]);
- end;
- end;
- i := nextPos;
- end;
- AItem.Settings := settingArray;
- if (Length(AItem.Settings) > 0) then
- HandleSettings(AItem);
- end;
- SetLength(statementList,sal);
- AItem.Rules := statementList;
- end;
- end;
- procedure ParseCollationDocument2(
- ADoc : TDOMDocument;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- n : TDOMNode;
- collationsElement : TDOMElement;
- i, c : Integer;
- item, tempItem : TCldrCollationItem;
- nl : TDOMNodeList;
- isnew : boolean;
- begin
- n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
- if (n = nil) then
- raise Exception.Create(sCollationsNodeNotFound);
- collationsElement := n as TDOMElement;
- //ACollation.Clear();
- ACollation.Mode := AMode;
- ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
- ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
- ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement);
- if collationsElement.HasChildNodes() then begin
- nl := collationsElement.ChildNodes;
- c := nl.Count;
- tempItem := TCldrCollationItem.Create();
- try
- item := nil;
- try
- for i := 0 to c - 1 do begin
- n := nl[i];
- if (n.NodeName = s_COLLATION) then begin
- tempItem.Clear();
- ParseCollationItem2((n as TDOMElement),tempItem,TCldrParserMode.HeaderParsing);
- item := ACollation.Find(tempItem.TypeName);
- isnew := (item = nil);
- if isnew then
- item := TCldrCollationItem.Create();
- if isnew or (item.Mode < AMode) then
- ParseCollationItem2((n as TDOMElement),item,AMode);
- if isnew then
- ACollation.Add(item);
- item := nil;
- end
- end;
- except
- FreeAndNil(item);
- raise;
- end;
- finally
- tempItem.Free();
- end;
- end;
- end;
- procedure ParseCollationDocument2(
- ADoc : TDOMDocument;
- ACollation : TCldrCollationItem;
- AType : string
- );
- var
- xv : TXPathVariable;
- begin
- xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
- try
- if (xv.AsNodeSet.Count = 0) then
- raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
- ACollation.Clear();
- ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
- finally
- xv.Free();
- end
- end;
- function ReadXMLFile(f: TStream) : TXMLDocument;overload;
- var
- src : TXMLInputSource;
- parser: TDOMParser;
- begin
- src := TXMLInputSource.Create(f);
- parser := TDOMParser.Create();
- try
- parser.Options.IgnoreComments := True;
- parser.Parse(src, Result);
- finally
- src.Free();
- parser.Free;
- end;
- end;
- function ReadXMLFile(const AFilename: String) : TXMLDocument;overload;
- var
- FileStream: TStream;
- begin
- Result := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- Result := ReadXMLFile(FileStream);
- finally
- FileStream.Free;
- end;
- end;
- procedure ParseCollationDocument2(
- const AFileName : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AFileName);
- try
- ParseCollationDocument2(doc,ACollation,AMode);
- ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
- finally
- doc.Free();
- end;
- end;
- procedure ParseCollationDocument2(
- AStream : TStream;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AStream);
- try
- ParseCollationDocument2(doc,ACollation,AMode);
- finally
- doc.Free();
- end;
- end;
- procedure ParseCollationDocument2(
- const AFileName : string;
- ACollation : TCldrCollationItem;
- AType : string
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AFileName);
- try
- ParseCollationDocument2(doc,ACollation,AType);
- finally
- doc.Free();
- end;
- end;
- procedure ParseCollationDocument2(
- AStream : TStream;
- ACollation : TCldrCollationItem;
- AType : string
- );
- var
- doc : TXMLDocument;
- begin
- doc := ReadXMLFile(AStream);
- try
- ParseCollationDocument2(doc,ACollation,AType);
- finally
- doc.Free();
- end;
- end;
- { TCldrCollationStreamLoader }
- procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string);
- begin
- if not FileExists(ALanguage) then
- raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]);
- end;
- function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer;
- var
- i : Integer;
- begin
- for i := Low(FLanguages) to High(FLanguages) do begin
- if (FLanguages[i] = ALanguage) then begin
- Result := i;
- exit;
- end;
- end;
- Result := -1;
- end;
- procedure TCldrCollationStreamLoader.LoadCollation(
- const ALanguage : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- i : Integer;
- locStream : TStream;
- begin
- i := IndexOf(ALanguage);
- if (i < 0) then
- CheckContent(ALanguage);
- locStream := FStreams[i];
- locStream.Position := 0;
- ParseCollationDocument2(locStream,ACollation,AMode);
- end;
- procedure TCldrCollationStreamLoader.LoadCollationType(
- const ALanguage,
- ATypeName : string;
- AType : TCldrCollationItem
- );
- var
- i : Integer;
- locStream : TStream;
- begin
- i := IndexOf(ALanguage);
- if (i < 0) then
- CheckContent(ALanguage);
- locStream := FStreams[i];
- locStream.Position := 0;
- ParseCollationDocument2(locStream,AType,ATypeName);
- end;
- constructor TCldrCollationStreamLoader.Create(
- const ALanguages : array of string;
- const AStreams : array of TStream
- );
- var
- c, i : Integer;
- begin
- c := Length(ALanguages);
- if (Length(AStreams) < c) then
- c := Length(AStreams);
- SetLength(FLanguages,c);
- SetLength(FStreams,c);
- for i := Low(ALanguages) to High(ALanguages) do begin
- FLanguages[i] := ALanguages[i];
- FStreams[i] := AStreams[i];
- end;
- end;
- destructor TCldrCollationStreamLoader.Destroy();
- var
- i : Integer;
- begin
- for i := Low(FStreams) to High(FStreams) do
- FreeAndNil(FStreams[i]);
- end;
- { TCldrCollationFileLoader }
- procedure TCldrCollationFileLoader.SetPath(APath: string);
- var
- s : string;
- begin
- if (APath = '') then
- s := ''
- else
- s := IncludeTrailingPathDelimiter(APath);
- if (s <> FPath) then
- FPath := s;
- end;
- function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string;
- begin
- Result := Format('%s%s.xml',[FPath,ALanguage]);
- end;
- procedure TCldrCollationFileLoader.CheckFile(AFileName: string);
- begin
- if not FileExists(AFileName) then
- raise EFOpenError.CreateFmt(SFOpenError,[AFileName]);
- end;
- procedure TCldrCollationFileLoader.LoadCollation(
- const ALanguage : string;
- ACollation : TCldrCollation;
- AMode : TCldrParserMode
- );
- var
- locFileName : string;
- begin
- locFileName := BuildFileName(ALanguage);
- CheckFile(locFileName);
- //ACollation.Clear();
- ParseCollationDocument2(locFileName,ACollation,AMode);
- end;
- procedure TCldrCollationFileLoader.LoadCollationType(
- const ALanguage,
- ATypeName : string;
- AType : TCldrCollationItem
- );
- var
- locFileName : string;
- begin
- locFileName := BuildFileName(ALanguage);
- CheckFile(locFileName);
- //AType.Clear();
- ParseCollationDocument2(locFileName,AType,ATypeName);
- end;
- constructor TCldrCollationFileLoader.Create(APath: string);
- begin
- SetPath(APath);
- end;
- end.
|