浏览代码

rtl, utils: apply patch of Inoussa:

This patch implements collation'loading at runtime. This reduce the final executable' size as the collation's data are now externaly stored. Note that It requires the external collation files to be shipped and the program to load the collations it needs using the "LoadCollation"/"RegisterCollation" procedure(s).

The external collation files are produced by "cldrparser" (while producing the static files). The root collation "ducet" 's external file is produced by "unihelper".

It is important to note that these files are endian specific :
 * collation_*_be.bco for big endian systems
 * collation_*_le.bco for little endian system.

The root collation should at be registered, be it staticaly by using the "unicodeducet" unit or dynamicaly by making a call sush as RegisterCollation(<collation dir>,'ducet'). 
It is possible, in the same application, to make use of static and dynamic.

git-svn-id: trunk@25295 -
paul 12 年之前
父节点
当前提交
f285948fcb
共有 5 个文件被更改,包括 524 次插入33 次删除
  1. 261 16
      rtl/objpas/unicodedata.pas
  2. 55 12
      utils/unicode/cldrhelper.pas
  3. 55 2
      utils/unicode/cldrparser.lpr
  4. 113 2
      utils/unicode/helper.pas
  5. 40 1
      utils/unicode/unihelper.lpr

+ 261 - 16
rtl/objpas/unicodedata.pas

@@ -274,6 +274,7 @@ type
     Props              : PUCA_PropItemRec;
     Props              : PUCA_PropItemRec;
     VariableLowLimit   : Word;
     VariableLowLimit   : Word;
     VariableHighLimit  : Word;
     VariableHighLimit  : Word;
+    Dynamic            : Boolean;
   public
   public
     function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
     function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
   end;
   end;
@@ -320,8 +321,13 @@ type
   function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
   function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
   function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
   function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
 
 
-  function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
+  function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
+  function RegisterCollation(
+    const ADirectory,
+          ALanguage : string
+  ) : Boolean;overload;
   function UnregisterCollation(const AName : ansistring): Boolean;
   function UnregisterCollation(const AName : ansistring): Boolean;
+  procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
   function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
   function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
   function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
   function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
   function GetCollationCount() : Integer;
   function GetCollationCount() : Integer;
@@ -330,6 +336,29 @@ type
     const ABaseName      : ansistring;
     const ABaseName      : ansistring;
     const AChangedFields : TCollationFields
     const AChangedFields : TCollationFields
   );
   );
+  function LoadCollation(
+    const AData       : Pointer;
+    const ADataLength : Integer
+  ) : PUCA_DataBook;overload;
+  function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
+  function LoadCollation(
+    const ADirectory,
+          ALanguage : string
+  ) : PUCA_DataBook;overload;
+  procedure FreeCollation(AItem : PUCA_DataBook);
+
+type
+  TEndianKind = (Little, Big);
+const
+  ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
+{$IFDEF ENDIAN_LITTLE}
+  ENDIAN_NATIVE     = TEndianKind.Little;
+  ENDIAN_NON_NATIVE = TEndianKind.Big;
+{$ENDIF ENDIAN_LITTLE}
+{$IFDEF ENDIAN_BIG}
+  ENDIAN_NATIVE = TEndianKind.Big;
+  ENDIAN_NON_NATIVE = TEndianKind.Little;
+{$ENDIF ENDIAN_BIG}
 
 
 resourcestring
 resourcestring
   SCollationNotFound = 'Collation not found : "%s".';
   SCollationNotFound = 'Collation not found : "%s".';
@@ -535,6 +564,21 @@ begin
   Result := a <= Cardinal(b);
   Result := a <= Cardinal(b);
 end;
 end;
 
 
+type
+  TBitOrder = 0..7;
+function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
+begin
+  Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
+end;
+
+procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
+begin
+  if AValue then
+    AData := AData or (1 shl (ABit mod 8))
+  else
+    AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
+end;
+
 var
 var
   CollationTable : array of PUCA_DataBook;
   CollationTable : array of PUCA_DataBook;
 function IndexOfCollation(const AName : string) : Integer;
 function IndexOfCollation(const AName : string) : Integer;
@@ -565,6 +609,23 @@ begin
   end;
   end;
 end;
 end;
 
 
+function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
+var
+  cl : PUCA_DataBook;
+begin
+  cl := LoadCollation(ADirectory,ALanguage);
+  if (cl = nil) then
+    exit(False);
+  try
+    Result := RegisterCollation(cl);
+  except
+    FreeCollation(cl);
+    raise;
+  end;
+  if not Result then
+    FreeCollation(cl);
+end;
+
 function UnregisterCollation(const AName : ansistring): Boolean;
 function UnregisterCollation(const AName : ansistring): Boolean;
 var
 var
   i, c : Integer;
   i, c : Integer;
@@ -582,6 +643,21 @@ begin
   end;
   end;
 end;
 end;
 
 
+procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
+var
+  i : Integer;
+  cl : PUCA_DataBook;
+begin
+  for i := Low(CollationTable) to High(CollationTable) do begin
+    if CollationTable[i].Dynamic then begin
+      cl := CollationTable[i];
+      CollationTable[i] := nil;
+      FreeCollation(cl);
+    end;
+  end;
+  SetLength(CollationTable,0);
+end;
+
 function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
 function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
 var
 var
   i : Integer;
   i : Integer;
@@ -632,6 +708,190 @@ begin
     p^.VariableLowLimit := base^.VariableHighLimit;
     p^.VariableLowLimit := base^.VariableHighLimit;
 end;
 end;
 
 
+type
+  TSerializedCollationHeader = packed record
+    Base               : TCollationName;
+    Version            : TCollationName;
+    CollationName      : TCollationName;
+    VariableWeight     : Byte;
+    Backwards          : Byte;
+    BMP_Table1Length   : DWord;
+    BMP_Table2Length   : DWord;
+    OBMP_Table1Length  : DWord;
+    OBMP_Table2Length  : DWord;
+    PropCount          : DWord;
+    VariableLowLimit   : Word;
+    VariableHighLimit  : Word;
+    ChangedFields      : Byte;
+  end;
+  PSerializedCollationHeader = ^TSerializedCollationHeader;
+
+procedure FreeCollation(AItem : PUCA_DataBook);
+var
+  h : PSerializedCollationHeader;
+begin
+  if (AItem = nil) or not(AItem^.Dynamic) then
+    exit;
+  h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
+  if (AItem^.BMP_Table1 <> nil) then
+    FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
+  if (AItem^.BMP_Table2 <> nil) then
+    FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
+  if (AItem^.OBMP_Table1 <> nil) then
+    FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
+  if (AItem^.OBMP_Table2 <> nil) then
+    FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
+  if (AItem^.Props <> nil) then
+    FreeMem(AItem^.Props,h^.PropCount);
+  FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
+end;
+
+function LoadCollation(
+  const AData       : Pointer;
+  const ADataLength : Integer
+) : PUCA_DataBook;
+var
+  dataPointer : PByte;
+  readedLength : LongInt;
+
+  function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
+  begin
+    Result := (readedLength + ALength) <= ADataLength;
+    if not result then
+      exit;
+    Move(dataPointer^,ADest^,ALength);
+    Inc(dataPointer,ALength);
+    readedLength := readedLength + ALength;
+  end;
+
+var
+  r : PUCA_DataBook;
+  h : PSerializedCollationHeader;
+  cfs : TCollationFields;
+  i : Integer;
+  baseName : TCollationName;
+begin
+  readedLength := 0;
+  dataPointer := AData;
+  r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
+  try
+    h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
+    if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
+      exit;
+    r^.Version := h^.Version;
+    r^.CollationName := h^.CollationName;
+    r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
+    r^.Backwards[0] := IsBitON(h^.Backwards,0);
+    r^.Backwards[1] := IsBitON(h^.Backwards,1);
+    r^.Backwards[2] := IsBitON(h^.Backwards,2);
+    r^.Backwards[3] := IsBitON(h^.Backwards,3);
+    if (h^.BMP_Table1Length > 0) then begin
+      r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
+        if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
+          exit;
+    end;
+    if (h^.BMP_Table2Length > 0) then begin
+      r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
+        if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
+          exit;
+    end;
+    if (h^.OBMP_Table1Length > 0) then begin
+      r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
+        if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
+          exit;
+    end;
+    if (h^.OBMP_Table2Length > 0) then begin
+      r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
+        if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
+          exit;
+    end;
+    r^.PropCount := h^.PropCount;
+    if (h^.PropCount > 0) then begin
+      r^.Props := GetMem(h^.PropCount);
+        if not ReadBuffer(r^.Props,h^.PropCount) then
+          exit;
+    end;
+    r^.VariableLowLimit := h^.VariableLowLimit;
+    r^.VariableHighLimit := h^.VariableHighLimit;
+
+    cfs := [];
+    for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
+      if IsBitON(h^.ChangedFields,i) then
+        cfs := cfs + [TCollationField(i)];
+    end;
+    if (h^.Base <> '') then
+      baseName := h^.Base
+    else if (h^.CollationName <> ROOT_COLLATION_NAME) then
+      baseName := ROOT_COLLATION_NAME
+    else
+      baseName := '';
+    if (baseName <> '') then
+      PrepareCollation(r,baseName,cfs);
+    r^.Dynamic := True;
+    Result := r;
+  except
+    FreeCollation(r);
+    raise;
+  end;
+end;
+
+{$PUSH}
+function LoadCollation(const AFileName : string) : PUCA_DataBook;
+const
+  BLOCK_SIZE = 16*1024;
+var
+  f : File of Byte;
+  locSize, locReaded, c : LongInt;
+  locBuffer : PByte;
+  locBlockSize : LongInt;
+begin
+  Result := nil;
+{$I-}
+  if (AFileName = '') then
+    exit;
+  Assign(f,AFileName);
+  Reset(f);
+  try
+    if (IOResult <> 0) then
+      exit;
+    locSize := FileSize(f);
+    if (locSize < SizeOf(TSerializedCollationHeader)) then
+      exit;
+    locBuffer := GetMem(locSize);
+    try
+      locBlockSize := BLOCK_SIZE;
+      locReaded := 0;
+      while (locReaded < locSize) do begin
+        if (locBlockSize > (locSize-locReaded)) then
+          locBlockSize := locSize-locReaded;
+        BlockRead(f,locBuffer[locReaded],locBlockSize,c);
+        if (IOResult <> 0) or (c <= 0) then
+          exit;
+        locReaded := locReaded + c;
+      end;
+      Result := LoadCollation(locBuffer,locSize);
+    finally
+      FreeMem(locBuffer,locSize);
+    end;
+  finally
+    Close(f);
+  end;
+end;
+{$POP}
+
+function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
+var
+  fileName : string;
+begin
+  fileName := ADirectory;
+  if (fileName <> '') then begin
+    if (fileName[Length(fileName)] <> DirectorySeparator) then
+      fileName := fileName + DirectorySeparator;
+  end;
+  fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
+  Result := LoadCollation(fileName);
+end;
+
 {$INCLUDE unicodedata.inc}
 {$INCLUDE unicodedata.inc}
 {$IFDEF ENDIAN_LITTLE}
 {$IFDEF ENDIAN_LITTLE}
   {$INCLUDE unicodedata_le.inc}
   {$INCLUDE unicodedata_le.inc}
@@ -1026,21 +1286,6 @@ begin
   end;
   end;
 end;
 end;
 
 
-type
-  TBitOrder = 0..7;
-function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
-begin
-  Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
-end;
-
-procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
-begin
-  if AValue then
-    AData := AData or (1 shl (ABit mod 8))
-  else
-    AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
-end;
-
 { TUCA_PropItemContextTreeNodeRec }
 { TUCA_PropItemContextTreeNodeRec }
 
 
 function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
 function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;

+ 55 - 12
utils/unicode/cldrhelper.pas

@@ -208,14 +208,16 @@ type
   ) : Integer;
   ) : Integer;
   function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
   function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
   procedure GenerateCdlrCollation(
   procedure GenerateCdlrCollation(
-    ACollation           : TCldrCollation;
-    AItemName            : string;
-    AStoreName           : string;
+    ACollation                : TCldrCollation;
+    AItemName                 : string;
+    AStoreName                : string;
     AStream,
     AStream,
     ANativeEndianStream,
     ANativeEndianStream,
-    AOtherEndianStream   : TStream;
-    ARootChars           : TOrderedCharacters;
-    ARootWeigths         : TUCA_LineRecArray
+    AOtherEndianStream,
+    ABinaryNativeEndianStream,
+    ABinaryOtherEndianStream  : TStream;
+    ARootChars                : TOrderedCharacters;
+    ARootWeigths              : TUCA_LineRecArray
   );
   );
 
 
   procedure GenerateUCA_CLDR_Head(
   procedure GenerateUCA_CLDR_Head(
@@ -1635,14 +1637,16 @@ begin
 end;
 end;
 
 
 procedure GenerateCdlrCollation(
 procedure GenerateCdlrCollation(
-  ACollation           : TCldrCollation;
-  AItemName            : string;
-  AStoreName           : string;
+  ACollation                : TCldrCollation;
+  AItemName                 : string;
+  AStoreName                : string;
   AStream,
   AStream,
   ANativeEndianStream,
   ANativeEndianStream,
-  AOtherEndianStream   : TStream;
-  ARootChars           : TOrderedCharacters;
-  ARootWeigths         : TUCA_LineRecArray
+  AOtherEndianStream,
+  ABinaryNativeEndianStream,
+  ABinaryOtherEndianStream  : TStream;
+  ARootChars                : TOrderedCharacters;
+  ARootWeigths              : TUCA_LineRecArray
 );
 );
 
 
   procedure AddLine(const ALine : ansistring; ADestStream : TStream);
   procedure AddLine(const ALine : ansistring; ADestStream : TStream);
@@ -1665,6 +1669,8 @@ var
   ucaoSecondTable  : TucaOBmpSecondTable;
   ucaoSecondTable  : TucaOBmpSecondTable;
   locHasProps : Boolean;
   locHasProps : Boolean;
   s : string;
   s : string;
+  serializedHeader : TSerializedCollationHeader;
+  e : TCollationField;
 begin
 begin
   locItem := ACollation.Find(AItemName);
   locItem := ACollation.Find(AItemName);
   if (locItem = nil) then
   if (locItem = nil) then
@@ -1707,6 +1713,43 @@ begin
       AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
       AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
     end;
     end;
     GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
     GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
+
+    FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
+    serializedHeader.Base := locItem.Base;
+    serializedHeader.Version := ACollation.Version;
+    serializedHeader.CollationName := ACollation.Language;
+    serializedHeader.VariableWeight := Ord(locUcaBook.VariableWeight);
+    SetBit(serializedHeader.Backwards,0,locUcaBook.Backwards[0]);
+    SetBit(serializedHeader.Backwards,1,locUcaBook.Backwards[1]);
+    SetBit(serializedHeader.Backwards,2,locUcaBook.Backwards[2]);
+    SetBit(serializedHeader.Backwards,3,locUcaBook.Backwards[3]);
+    if locHasProps then begin
+      serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
+      serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
+                                           (Length(ucaSecondTable) * SizeOf(UInt24));
+      serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
+      serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
+                                           (Length(ucaoSecondTable) * SizeOf(UInt24));
+      serializedHeader.PropCount := locUcaProps^.ItemSize;
+      serializedHeader.VariableLowLimit := locUcaProps^.VariableLowLimit;
+      serializedHeader.VariableHighLimit := locUcaProps^.VariableHighLimit;
+    end else begin
+      serializedHeader.VariableLowLimit := High(Word);
+      serializedHeader.VariableHighLimit := 0;
+    end;
+    serializedHeader.ChangedFields := 0;
+    for e := Low(TCollationField) to High(TCollationField) do begin
+      if (e in locItem.ChangedFields) then
+        SetBit(serializedHeader.ChangedFields,Ord(e),True);
+    end;
+    ABinaryNativeEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
+    ReverseRecordBytes(serializedHeader);
+    ABinaryOtherEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
+    if locHasProps then begin
+      GenerateBinaryUCA_BmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaFirstTable,ucaSecondTable);
+      GenerateBinaryUCA_OBmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
+      GenerateBinaryUCA_PropTable(ABinaryNativeEndianStream,ABinaryOtherEndianStream,locUcaProps);
+    end;
   finally
   finally
     locSequence.Clear();
     locSequence.Clear();
     FreeUcaBook(locUcaProps);
     FreeUcaBook(locUcaProps);

+ 55 - 2
utils/unicode/cldrparser.lpr

@@ -22,9 +22,10 @@
 program cldrparser;
 program cldrparser;
 
 
 {$mode objfpc}{$H+}
 {$mode objfpc}{$H+}
+{ $define WINCE_TEST}
 
 
 uses
 uses
-  SysUtils, classes, getopts,
+  SysUtils, classes, getopts,{$ifdef WINCE}StreamIO,{$endif}
   cldrhelper, helper, cldrtest, cldrxml, unicodeset;
   cldrhelper, helper, cldrtest, cldrxml, unicodeset;
 
 
 const
 const
@@ -66,6 +67,12 @@ var
   idx, k : Integer;
   idx, k : Integer;
   s : string;
   s : string;
 begin
 begin
+{$ifdef WINCE_TEST}
+  ADataDir := ExtractFilePath(ParamStr(0))+'data';
+  AOuputDir := ADataDir;
+  ACollationFileName := 'sv.xml';
+  exit(True);
+{$endif WINCE_TEST}
   if (ParamCount() = 0) then
   if (ParamCount() = 0) then
     exit(False);
     exit(False);
   Result := True;
   Result := True;
@@ -101,14 +108,41 @@ end;
 var
 var
   orderedChars : TOrderedCharacters;
   orderedChars : TOrderedCharacters;
   ucaBook : TUCA_DataBook;
   ucaBook : TUCA_DataBook;
-  stream, streamNE, streamOE : TMemoryStream;
+  stream, streamNE, streamOE, binaryStreamNE, binaryStreamOE : TMemoryStream;
   s, collationFileName, collationTypeName : string;
   s, collationFileName, collationTypeName : string;
   i , c: Integer;
   i , c: Integer;
   collation : TCldrCollation;
   collation : TCldrCollation;
   dataPath, outputPath : string;
   dataPath, outputPath : string;
   collationItem : TCldrCollationItem;
   collationItem : TCldrCollationItem;
   testSuiteFlag : Boolean;
   testSuiteFlag : Boolean;
+{$ifdef WINCE}
+  fs : TFileStream;
+{$endif WINCE}
 begin
 begin
+{$ifdef WINCE}
+  s := ExtractFilePath(ParamStr(0))+'cldr-log.txt';
+  DeleteFile(s);
+  fs := TFileStream.Create(s,fmCreate);
+  AssignStream(Output,fs);
+  Rewrite(Output);
+  s := ExtractFilePath(ParamStr(0))+'cldr-err.txt';
+  DeleteFile(s);
+  fs := TFileStream.Create(s,fmCreate);
+  AssignStream(ErrOutput,fs);
+  Rewrite(ErrOutput);
+{$endif WINCE}
+{$ifdef WINCE_TEST}
+  testSuiteFlag := True;
+  try
+    exec_tests();
+  except
+    on e : Exception do begin
+      WriteLn('Exception : '+e.Message);
+      raise;
+    end;
+  end;
+  exit;
+{$endif WINCE_TEST}
   dataPath := '';
   dataPath := '';
   outputPath := '';
   outputPath := '';
   collationFileName := '';
   collationFileName := '';
@@ -132,10 +166,12 @@ begin
     outputPath := dataPath
     outputPath := dataPath
   else
   else
     outputPath := IncludeTrailingPathDelimiter(outputPath);
     outputPath := IncludeTrailingPathDelimiter(outputPath);
+{$ifndef WINCE_TEST}
   if (ParamCount() = 0) then begin
   if (ParamCount() = 0) then begin
     WriteLn(SUsageText);
     WriteLn(SUsageText);
     Halt(1);
     Halt(1);
   end;
   end;
+{$endif WINCE_TEST}
   if not(
   if not(
        FileExists(dataPath+'UCA_Rules_SHORT.xml') and
        FileExists(dataPath+'UCA_Rules_SHORT.xml') and
        FileExists(dataPath+'allkeys.txt')
        FileExists(dataPath+'allkeys.txt')
@@ -155,6 +191,8 @@ begin
   stream := nil;
   stream := nil;
   streamNE := nil;
   streamNE := nil;
   streamOE := nil;
   streamOE := nil;
+  binaryStreamNE := nil;
+  binaryStreamOE := nil;
   collation := TCldrCollation.Create();
   collation := TCldrCollation.Create();
   try
   try
     ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
     ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
@@ -194,9 +232,12 @@ begin
       stream.Clear();
       stream.Clear();
       streamNE := TMemoryStream.Create();
       streamNE := TMemoryStream.Create();
       streamOE := TMemoryStream.Create();
       streamOE := TMemoryStream.Create();
+      binaryStreamNE := TMemoryStream.Create();
+      binaryStreamOE := TMemoryStream.Create();
       s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas');
       s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas');
       GenerateCdlrCollation(
       GenerateCdlrCollation(
         collation,collationTypeName,s,stream,streamNE,streamOE,
         collation,collationTypeName,s,stream,streamNE,streamOE,
+        binaryStreamNE,binaryStreamOE,
         orderedChars,ucaBook.Lines
         orderedChars,ucaBook.Lines
       );
       );
       stream.SaveToFile(ExtractFilePath(collationFileName)+s);
       stream.SaveToFile(ExtractFilePath(collationFileName)+s);
@@ -204,8 +245,20 @@ begin
         streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
         streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
         streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
         streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
       end;
       end;
+      if (binaryStreamNE.Size > 0) then begin
+        binaryStreamNE.SaveToFile(
+          ExtractFilePath(collationFileName) +
+          ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]]))
+        );
+        binaryStreamOE.SaveToFile(
+          ExtractFilePath(collationFileName) +
+          ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]]))
+        );
+      end;
     end;
     end;
   finally
   finally
+    binaryStreamOE.Free();
+    binaryStreamNE.Free();
     streamOE.Free();
     streamOE.Free();
     streamNE.Free();
     streamNE.Free();
     stream.Free();
     stream.Free();

+ 113 - 2
utils/unicode/helper.pas

@@ -487,11 +487,23 @@ const
     var   AFirstTable            : TucaBmpFirstTable;
     var   AFirstTable            : TucaBmpFirstTable;
     var   ASecondTable           : TucaBmpSecondTable
     var   ASecondTable           : TucaBmpSecondTable
   );
   );
+  procedure GenerateBinaryUCA_BmpTables(
+          ANativeEndianStream,
+          ANonNativeEndianStream : TStream;
+    var   AFirstTable            : TucaBmpFirstTable;
+    var   ASecondTable           : TucaBmpSecondTable
+  );
   procedure GenerateUCA_PropTable(
   procedure GenerateUCA_PropTable(
           ADest     : TStream;
           ADest     : TStream;
     const APropBook : PUCA_PropBook;
     const APropBook : PUCA_PropBook;
     const AEndian   : TEndianKind
     const AEndian   : TEndianKind
   );
   );
+  procedure GenerateBinaryUCA_PropTable(
+  // WARNING : files must be generated for each endianess (Little / Big)
+          ANativeEndianStream,
+          ANonNativeEndianStream : TStream;
+    const APropBook              : PUCA_PropBook
+  );
   procedure GenerateUCA_OBmpTables(
   procedure GenerateUCA_OBmpTables(
           AStream,
           AStream,
           ANativeEndianStream,
           ANativeEndianStream,
@@ -499,6 +511,12 @@ const
     var   AFirstTable            : TucaOBmpFirstTable;
     var   AFirstTable            : TucaOBmpFirstTable;
     var   ASecondTable           : TucaOBmpSecondTable
     var   ASecondTable           : TucaOBmpSecondTable
   );
   );
+  procedure GenerateBinaryUCA_OBmpTables(
+          ANativeEndianStream,
+          ANonNativeEndianStream : TStream;
+    var   AFirstTable            : TucaOBmpFirstTable;
+    var   ASecondTable           : TucaOBmpSecondTable
+  );
 
 
   procedure Parse_UnicodeData(
   procedure Parse_UnicodeData(
           ADataAStream   : TMemoryStream;
           ADataAStream   : TMemoryStream;
@@ -611,7 +629,6 @@ const
   ): PPropRec; inline;overload;
   ): PPropRec; inline;overload;
   procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
   procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
   function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
   function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
-//--------------------
 
 
 type
 type
   TBitOrder = 0..7;
   TBitOrder = 0..7;
@@ -640,6 +657,29 @@ type
     const ADataLen : Integer
     const ADataLen : Integer
   );
   );
 
 
+type
+  TCollationName = string[128];
+  TSerializedCollationHeader = packed record
+    Base               : TCollationName;
+    Version            : TCollationName;
+    CollationName      : TCollationName;
+    VariableWeight     : Byte;
+    Backwards          : Byte;
+    BMP_Table1Length   : DWord;
+    BMP_Table2Length   : DWord;
+    OBMP_Table1Length  : DWord;
+    OBMP_Table2Length  : DWord;
+    PropCount          : DWord;
+    VariableLowLimit   : Word;
+    VariableHighLimit  : Word;
+    ChangedFields      : Byte;
+  end;
+  PSerializedCollationHeader = ^TSerializedCollationHeader;
+
+  procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
+  procedure ReverseBytes(var AData; const ALength : Integer);
+  procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
+
 resourcestring
 resourcestring
   SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
   SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
 
 
@@ -3294,6 +3334,28 @@ begin
   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
 end;
 end;
 
 
+procedure GenerateBinaryUCA_BmpTables(
+        ANativeEndianStream,
+        ANonNativeEndianStream : TStream;
+  var   AFirstTable            : TucaBmpFirstTable;
+  var   ASecondTable           : TucaBmpSecondTable
+);
+var
+  i, j : Integer;
+  value : UInt24;
+begin
+  ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
+  ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
+  for i := Low(ASecondTable) to High(ASecondTable) do begin
+    for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
+      value := ASecondTable[i][j];
+      ANativeEndianStream.Write(value,SizeOf(value));
+      ReverseBytes(value,SizeOf(value));
+      ANonNativeEndianStream.Write(value,SizeOf(value));
+    end;
+  end;
+end;
+
 procedure GenerateUCA_PropTable(
 procedure GenerateUCA_PropTable(
 // WARNING : files must be generated for each endianess (Little / Big)
 // WARNING : files must be generated for each endianess (Little / Big)
         ADest     : TStream;
         ADest     : TStream;
@@ -3336,6 +3398,17 @@ begin
   AddLine('  );' + sLineBreak);
   AddLine('  );' + sLineBreak);
 end;
 end;
 
 
+procedure GenerateBinaryUCA_PropTable(
+// WARNING : files must be generated for each endianess (Little / Big)
+        ANativeEndianStream,
+        ANonNativeEndianStream : TStream;
+  const APropBook              : PUCA_PropBook
+);
+begin
+  ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
+  ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
+end;
+
 procedure GenerateUCA_OBmpTables(
 procedure GenerateUCA_OBmpTables(
         AStream,
         AStream,
         ANativeEndianStream,
         ANativeEndianStream,
@@ -3410,7 +3483,34 @@ begin
   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
 end;
 end;
 
 
-//-------------------------------------------
+procedure GenerateBinaryUCA_OBmpTables(
+        ANativeEndianStream,
+        ANonNativeEndianStream : TStream;
+  var   AFirstTable            : TucaOBmpFirstTable;
+  var   ASecondTable           : TucaOBmpSecondTable
+);
+var
+  i, j : Integer;
+  locLine : string;
+  wordValue : Word;
+  value : UInt24;
+begin
+  for i := Low(AFirstTable) to High(AFirstTable) do begin
+    wordValue := AFirstTable[i];
+    ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
+    ReverseBytes(wordValue,SizeOf(wordValue));
+    ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
+  end;
+
+  for i := Low(ASecondTable) to High(ASecondTable) do begin
+    for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
+      value := ASecondTable[i][j];
+      ANativeEndianStream.Write(value,SizeOf(value));
+      ReverseBytes(value,SizeOf(value));
+      ANonNativeEndianStream.Write(value,SizeOf(value));
+    end;
+  end;
+end;
 
 
 type
 type
   POBmpSecondTableItem = ^TOBmpSecondTableItem;
   POBmpSecondTableItem = ^TOBmpSecondTableItem;
@@ -4103,6 +4203,17 @@ begin
   Result := r;
   Result := r;
 end;
 end;
 
 
+procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
+begin
+  ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
+  ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
+  ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
+  ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
+  ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
+  ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
+  ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
+end;
+
 procedure ReverseBytes(var AData; const ALength : Integer);
 procedure ReverseBytes(var AData; const ALength : Integer);
 var
 var
   i,j : PtrInt;
   i,j : PtrInt;

+ 40 - 1
utils/unicode/unihelper.lpr

@@ -31,7 +31,7 @@
 program unihelper;
 program unihelper;
 
 
 {$mode objfpc}{$H+}
 {$mode objfpc}{$H+}
-{$typedadress on}
+{$typedaddress on}
 
 
 uses
 uses
   SysUtils, Classes,
   SysUtils, Classes,
@@ -66,6 +66,7 @@ end;
 var
 var
   dataPath, outputPath : string;
   dataPath, outputPath : string;
   stream, binStreamNE, binStreamOE, tmpStream : TMemoryStream;
   stream, binStreamNE, binStreamOE, tmpStream : TMemoryStream;
+  binaryStreamNE, binaryStreamOE : TMemoryStream;
   hangulSyllables : TCodePointRecArray;
   hangulSyllables : TCodePointRecArray;
   ucaBook : TUCA_DataBook;
   ucaBook : TUCA_DataBook;
   ucaPropBook : PUCA_PropBook;
   ucaPropBook : PUCA_PropBook;
@@ -95,6 +96,7 @@ var
   ucaoFirstTable   : TucaoBmpFirstTable;
   ucaoFirstTable   : TucaoBmpFirstTable;
   ucaoSecondTable  : TucaOBmpSecondTable;
   ucaoSecondTable  : TucaOBmpSecondTable;
   WL : Integer;
   WL : Integer;
+  serializedHeader : TSerializedCollationHeader;
 begin
 begin
   WriteLn(SUsage+sLineBreak);
   WriteLn(SUsage+sLineBreak);
   if (ParamCount > 0) then
   if (ParamCount > 0) then
@@ -125,6 +127,8 @@ begin
     Halt(1);
     Halt(1);
   end;
   end;
 
 
+  binaryStreamNE := nil;
+  binaryStreamOE := nil;
   binStreamOE := nil;
   binStreamOE := nil;
   binStreamNE := nil;
   binStreamNE := nil;
   tmpStream := nil;
   tmpStream := nil;
@@ -206,6 +210,8 @@ begin
 {$IFDEF UCA_TEST}
 {$IFDEF UCA_TEST}
     uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable);
     uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable);
 {$ENDIF UCA_TEST}
 {$ENDIF UCA_TEST}
+    binaryStreamNE := TMemoryStream.Create();
+    binaryStreamOE := TMemoryStream.Create();
     WriteLn('Generate UCA Props tables ...');
     WriteLn('Generate UCA Props tables ...');
     binStreamNE.Clear();
     binStreamNE.Clear();
     binStreamOE.Clear();
     binStreamOE.Clear();
@@ -226,6 +232,37 @@ begin
       binStreamOE.SaveToFile(GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
       binStreamOE.SaveToFile(GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
     binStreamNE.Clear();
     binStreamNE.Clear();
     binStreamOE.Clear();
     binStreamOE.Clear();
+// Binary DUCET
+    FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
+    serializedHeader.Version := ucaBook.Version;
+    serializedHeader.CollationName := 'DUCET';//'Default Unicode Collation Element Table (DUCET)';
+    serializedHeader.VariableWeight := Ord(ucaBook.VariableWeight);
+    SetBit(serializedHeader.Backwards,0,ucaBook.Backwards[0]);
+    SetBit(serializedHeader.Backwards,1,ucaBook.Backwards[1]);
+    SetBit(serializedHeader.Backwards,2,ucaBook.Backwards[2]);
+    SetBit(serializedHeader.Backwards,3,ucaBook.Backwards[3]);
+    serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
+    serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
+                                         (Length(ucaSecondTable) * SizeOf(UInt24));
+    serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
+    serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
+                                         (Length(ucaoSecondTable) * SizeOf(UInt24));
+    serializedHeader.PropCount := ucaPropBook^.ItemSize;
+    serializedHeader.VariableLowLimit := ucaPropBook^.VariableLowLimit;
+    serializedHeader.VariableHighLimit := ucaPropBook^.VariableHighLimit;
+    binaryStreamNE.Write(serializedHeader,SizeOf(serializedHeader));
+    ReverseRecordBytes(serializedHeader);
+    binaryStreamOE.Write(serializedHeader,SizeOf(serializedHeader));
+      GenerateBinaryUCA_BmpTables(binaryStreamNE,binaryStreamOE,ucaFirstTable,ucaSecondTable);
+      GenerateBinaryUCA_OBmpTables(binaryStreamNE,binaryStreamOE,ucaoFirstTable,ucaoSecondTable);
+      GenerateBinaryUCA_PropTable(binaryStreamNE,binaryStreamOE,ucaPropBook);
+    binaryStreamNE.SaveToFile(
+      outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]])
+    );
+    binaryStreamOE.SaveToFile(
+      outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]])
+    );
+// Binary DUCET - END
 
 
 
 
     stream.Clear();
     stream.Clear();
@@ -386,6 +423,8 @@ begin
     end;
     end;
     stream.SaveToFile(outputPath + 'diff2.txt');
     stream.SaveToFile(outputPath + 'diff2.txt');
   finally
   finally
+    binaryStreamOE.Free();
+    binaryStreamNE.Free();
     tmpStream.Free();
     tmpStream.Free();
     binStreamOE.Free();
     binStreamOE.Free();
     binStreamNE.Free();
     binStreamNE.Free();