Browse Source

utils: patch to unicode utils from Inoussa:
The CLDR parser now actually parses the selected
collation type only. This should significantly reduce
the memory consumption.

git-svn-id: trunk@23883 -

paul 12 years ago
parent
commit
f12a0b7ece
3 changed files with 131 additions and 42 deletions
  1. 16 0
      utils/unicode/cldrhelper.pas
  2. 9 5
      utils/unicode/cldrparser.lpr
  3. 106 37
      utils/unicode/cldrxml.pas

+ 16 - 0
utils/unicode/cldrhelper.pas

@@ -144,6 +144,8 @@ type
 
 
   TCldrCollation = class;
   TCldrCollation = class;
 
 
+  { TCldrCollationItem }
+
   TCldrCollationItem = class
   TCldrCollationItem = class
   private
   private
     FBackwards: Boolean;
     FBackwards: Boolean;
@@ -153,6 +155,7 @@ type
     FRules: TReorderSequenceArray;
     FRules: TReorderSequenceArray;
     FTypeName: string;
     FTypeName: string;
   public
   public
+    procedure Clear();
     property Parent : TCldrCollation read FParent;
     property Parent : TCldrCollation read FParent;
     property TypeName : string read FTypeName write FTypeName;
     property TypeName : string read FTypeName write FTypeName;
     property Base : string read FBase write FBase;
     property Base : string read FBase write FBase;
@@ -187,6 +190,8 @@ type
     property Items[Index : Integer] : TCldrCollationItem read GetItem;
     property Items[Index : Integer] : TCldrCollationItem read GetItem;
   end;
   end;
 
 
+  TCldrParserMode = (HeaderParsing, FullParsing);
+
   function ComputeWeigths(
   function ComputeWeigths(
     const AData        : PReorderUnit;
     const AData        : PReorderUnit;
     const ADataLen     : Integer;
     const ADataLen     : Integer;
@@ -1104,6 +1109,17 @@ begin
   Result := locNotFound;
   Result := locNotFound;
 end;
 end;
 
 
+{ TCldrCollationItem }
+
+procedure TCldrCollationItem.Clear();
+begin
+  FBackwards := False;
+  FBase := '';
+  FChangedFields := [];
+  SetLength(FRules,0);
+  FTypeName := '';
+end;
+
 { TCldrCollation }
 { TCldrCollation }
 
 
 function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
 function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;

+ 9 - 5
utils/unicode/cldrparser.lpr

@@ -102,6 +102,7 @@ var
   i , c: Integer;
   i , c: Integer;
   collation : TCldrCollation;
   collation : TCldrCollation;
   dataPath, outputPath : string;
   dataPath, outputPath : string;
+  collationItem : TCldrCollationItem;
 begin
 begin
 {$ifdef test_suite}
 {$ifdef test_suite}
   exec_tests();
   exec_tests();
@@ -150,17 +151,20 @@ begin
   endianStream := nil;
   endianStream := nil;
   collation := TCldrCollation.Create();
   collation := TCldrCollation.Create();
   try
   try
-    ParseCollationDocument(collationFileName,collation);
+    ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
     WriteLn(Format('  Collation Count = %d',[collation.ItemCount]));
     WriteLn(Format('  Collation Count = %d',[collation.ItemCount]));
     if (collation.ItemCount = 0) then begin
     if (collation.ItemCount = 0) then begin
       WriteLn('No collation in this file.');
       WriteLn('No collation in this file.');
     end else begin
     end else begin
       for i := 0 to collation.ItemCount - 1 do
       for i := 0 to collation.ItemCount - 1 do
-        WriteLn(Format('  Item[%d] = %d "resets"; Type = %s',[i, Length(collation.Items[i].Rules),collation.Items[i].TypeName]));
-      if (collation.Find(collationTypeName) = nil) then
+        WriteLn(Format('  Item[%d] = (Type = %s)',[i, collation.Items[i].TypeName]));
+      collationItem := collation.Find(collationTypeName);
+      if (collationItem = nil) then begin
         collationTypeName := FindCollationDefaultItemName(collation);
         collationTypeName := FindCollationDefaultItemName(collation);
-      WriteLn('Collation Item Name : ',collationTypeName);
-
+        collationItem := collation.Find(collationTypeName);
+      end;
+      WriteLn(Format('Parsing Collation Item "%s" ...',[collationTypeName]));
+      ParseCollationDocument(collationFileName,collationItem,collationTypeName);
 
 
       s := dataPath + 'UCA_Rules_SHORT.xml';
       s := dataPath + 'UCA_Rules_SHORT.xml';
       WriteLn;
       WriteLn;

+ 106 - 37
utils/unicode/cldrxml.pas

@@ -32,13 +32,33 @@ uses
   procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
   procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
   procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
   procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
 
 
-  procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
-  procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
+  procedure ParseCollationDocument(
+    ADoc       : TDOMDocument;
+    ACollation : TCldrCollation;
+    AMode      : TCldrParserMode
+  );overload;
+  procedure ParseCollationDocument(
+    const AFileName  : string;
+          ACollation : TCldrCollation;
+          AMode      : TCldrParserMode
+  );overload;
+
+  procedure ParseCollationDocument(
+    const AFileName  : string;
+          ACollation : TCldrCollationItem;
+          AType      : string
+  );overload;
+  procedure ParseCollationDocument(
+    ADoc       : TDOMDocument;
+    ACollation : TCldrCollationItem;
+    AType      : string
+  );overload;
 
 
 resourcestring
 resourcestring
   sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
   sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
   sCodePointExpected = 'Code Point node expected as child at this position "%d".';
   sCodePointExpected = 'Code Point node expected as child at this position "%d".';
   sCollationsNodeNotFound = '"collations" node not found.';
   sCollationsNodeNotFound = '"collations" node not found.';
+  sCollationTypeNotFound = 'collation "Type" not found : "%s".';
   sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
   sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
   sInvalidResetClause = 'Invalid "Reset" clause.';
   sInvalidResetClause = 'Invalid "Reset" clause.';
   sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
   sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
@@ -500,7 +520,11 @@ begin
   SetLength(r,0);
   SetLength(r,0);
 end;
 end;
 
 
-procedure ParseCollationItem(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
+procedure ParseCollationItem(
+  ACollationNode : TDOMElement;
+  AItem          : TCldrCollationItem;
+  AMode          : TCldrParserMode
+);
 var
 var
   n : TDOMNode;
   n : TDOMNode;
   rulesElement : TDOMElement;
   rulesElement : TDOMElement;
@@ -515,43 +539,49 @@ begin
   AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
   AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
   if AItem.Backwards then
   if AItem.Backwards then
     AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
     AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
-
-  SetLength(statementList,15);
-  sal := 0;
-  statement := @statementList[0];
-  s := EvaluateXPathStr('suppress_contractions',ACollationNode);
-  if (s <> '') then begin
-    if (ParseDeletion(s,statement) > 0) then begin
-      Inc(sal);
-      Inc(statement);
-    end else begin
-      statement^.Clear();
+  AItem.Rules := nil;
+  if (AMode = TCldrParserMode.FullParsing) then begin
+    SetLength(statementList,15);
+    sal := 0;
+    statement := @statementList[0];
+    s := EvaluateXPathStr('suppress_contractions',ACollationNode);
+    if (s <> '') then begin
+      if (ParseDeletion(s,statement) > 0) then begin
+        Inc(sal);
+        Inc(statement);
+      end else begin
+        statement^.Clear();
+      end;
     end;
     end;
-  end;
-  n := ACollationNode.FindNode(s_RULES);
-  if (n <> nil) then begin
-    rulesElement := n as TDOMElement;
-    c := rulesElement.ChildNodes.Count;
-    nextPos := 0;
-    i := 0;
-    while (i < c) do begin
-      statement^.Clear();
-      if not ParseStatement(rulesElement,i,statement,nextPos) then
-        Break;
-      i := nextPos;
-      Inc(statement);
-      Inc(sal);
-      if (sal >= Length(statementList)) then begin
-        SetLength(statementList,(sal*2));
-        statement := @statementList[(sal-1)];
+    n := ACollationNode.FindNode(s_RULES);
+    if (n <> nil) then begin
+      rulesElement := n as TDOMElement;
+      c := rulesElement.ChildNodes.Count;
+      nextPos := 0;
+      i := 0;
+      while (i < c) do begin
+        statement^.Clear();
+        if not ParseStatement(rulesElement,i,statement,nextPos) then
+          Break;
+        i := nextPos;
+        Inc(statement);
+        Inc(sal);
+        if (sal >= Length(statementList)) then begin
+          SetLength(statementList,(sal*2));
+          statement := @statementList[(sal-1)];
+        end;
       end;
       end;
     end;
     end;
+    SetLength(statementList,sal);
+    AItem.Rules := statementList;
   end;
   end;
-  SetLength(statementList,sal);
-  AItem.Rules := statementList;
 end;
 end;
 
 
-procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
+procedure ParseCollationDocument(
+  ADoc       : TDOMDocument;
+  ACollation : TCldrCollation;
+  AMode      : TCldrParserMode
+);
 var
 var
   rulesNodes, n : TDOMNode;
   rulesNodes, n : TDOMNode;
   collationsElement, rulesElement : TDOMElement;
   collationsElement, rulesElement : TDOMElement;
@@ -576,7 +606,7 @@ begin
         n := nl[i];
         n := nl[i];
         if (n.NodeName = s_COLLATION) then begin
         if (n.NodeName = s_COLLATION) then begin
           item := TCldrCollationItem.Create();
           item := TCldrCollationItem.Create();
-          ParseCollationItem((n as TDOMElement),item);
+          ParseCollationItem((n as TDOMElement),item,AMode);
           ACollation.Add(item);
           ACollation.Add(item);
           item := nil;
           item := nil;
         end
         end
@@ -588,6 +618,25 @@ begin
   end;
   end;
 end;
 end;
 
 
+procedure ParseCollationDocument(
+  ADoc       : TDOMDocument;
+  ACollation : TCldrCollationItem;
+  AType      : string
+);
+var
+  xv : TXPathVariable;
+begin
+  xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
+  try
+    if (xv.AsNodeSet.Count = 0) then
+      raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
+    ACollation.Clear();
+    ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
+  finally
+    xv.Free();
+  end
+end;
+
 function ReadXMLFile(f: TStream) : TXMLDocument;
 function ReadXMLFile(f: TStream) : TXMLDocument;
 var
 var
   src : TXMLInputSource;
   src : TXMLInputSource;
@@ -618,17 +667,37 @@ begin
   end;
   end;
 end;
 end;
 
 
-procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
+procedure ParseCollationDocument(
+  const AFileName  : string;
+        ACollation : TCldrCollation;
+        AMode      : TCldrParserMode
+);
 var
 var
   doc : TXMLDocument;
   doc : TXMLDocument;
 begin
 begin
   doc := ReadXMLFile(AFileName);
   doc := ReadXMLFile(AFileName);
   try
   try
-    ParseCollationDocument(doc,ACollation);
+    ParseCollationDocument(doc,ACollation,AMode);
     ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
     ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
   finally
   finally
     doc.Free();
     doc.Free();
   end;
   end;
 end;
 end;
 
 
+procedure ParseCollationDocument(
+  const AFileName  : string;
+        ACollation : TCldrCollationItem;
+        AType      : string
+);
+var
+  doc : TXMLDocument;
+begin
+  doc := ReadXMLFile(AFileName);
+  try
+    ParseCollationDocument(doc,ACollation,AType);
+  finally
+    doc.Free();
+  end;
+end;
+
 end.
 end.