Ver Fonte

rtl:
- fix compilation of character.pas by fpc 2.4
- check TCharacter method arguments and raise appropriate EArgumentException or EArgumentOutOfRangeException exceptions
- implement to/from UTF32 conversion based on utf16toutf32 and friends

git-svn-id: trunk@19183 -

paul há 14 anos atrás
pai
commit
e774a20c97
2 ficheiros alterados com 120 adições e 16 exclusões
  1. 115 16
      rtl/objpas/character.pas
  2. 5 0
      rtl/objpas/rtlconst.inc

+ 115 - 16
rtl/objpas/character.pas

@@ -1,6 +1,7 @@
 unit character;
 unit character;
 
 
 interface
 interface
+{$ifndef VER2_4}
 {$mode objfpc}
 {$mode objfpc}
 {$H+}
 {$H+}
 {$PACKENUM 1}
 {$PACKENUM 1}
@@ -51,11 +52,10 @@ type
 
 
   TCharacter = class sealed
   TCharacter = class sealed
   public
   public
-    {class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
-    
+    class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
     class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
     class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
     class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
     class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
-    class function ConvertToUtf32(AHighSurrogate, ALowSurrogate : UnicodeChar; AIndex : Integer) : UCS4Char; overload; static;}
+    class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
     
     
     class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
     class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
     class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
     class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
@@ -112,9 +112,13 @@ type
     class function ToUpper(const AString : UnicodeString) : UnicodeString; overload; static;
     class function ToUpper(const AString : UnicodeString) : UnicodeString; overload; static;
   end;
   end;
 
 
+{$endif VER2_4}
+
 implementation
 implementation
-uses  
-  SysUtils;
+{$ifndef VER2_4}
+uses
+  SysUtils,
+  RtlConsts;
 
 
 type  
 type  
   PUC_Prop = ^TUC_Prop;
   PUC_Prop = ^TUC_Prop;
@@ -133,7 +137,11 @@ const
   LOW_SURROGATE_END    = Word($DFFF); 
   LOW_SURROGATE_END    = Word($DFFF); 
   
   
   HIGH_SURROGATE_BEGIN = Word($D800); 
   HIGH_SURROGATE_BEGIN = Word($D800); 
-  HIGH_SURROGATE_END   = Word($DBFF); 
+  HIGH_SURROGATE_END   = Word($DBFF);
+
+  UCS4_HALF_BASE       = LongWord($10000);
+  UCS4_HALF_MASK       = Word($3FF);
+  MAX_LEGAL_UTF32      = $10FFFF;
     
     
 const
 const
   LETTER_CATEGORIES = [
   LETTER_CATEGORIES = [
@@ -163,7 +171,7 @@ const
       TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
       TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
     ];
     ];
 
 
-function GetProps(const ACodePoint : Word) : PUC_Prop; //inline;
+class function GetProps(const ACodePoint : Word) : PUC_Prop; inline;
 begin
 begin
   Result:=
   Result:=
     @UC_PROP_ARRAY[
     @UC_PROP_ARRAY[
@@ -172,10 +180,67 @@ begin
          WordRec(ACodePoint).Lo
          WordRec(ACodePoint).Lo
        ]
        ]
      ];
      ];
-end;                 
+end;
 
 
 { TCharacter }
 { TCharacter }
 
 
+class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
+begin
+  if AChar < UCS4_HALF_BASE then
+  begin
+    if IsSurrogate(UnicodeChar(AChar)) then
+      raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
+    Result := UnicodeChar(AChar);
+  end
+  else
+  begin
+    if AChar > MAX_LEGAL_UTF32 then
+      raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
+    SetLength(Result, 2);
+    AChar := AChar - UCS4_HALF_BASE;
+    Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
+    Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
+  end;
+end;
+
+class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
+begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := Word(AString[AIndex]);
+  if IsHighSurrogate(UnicodeChar(Result)) then
+  begin
+    if Length(AString) < Succ(AIndex) then
+      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
+    Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
+  end;
+end;
+
+class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
+begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := Word(AString[AIndex]);
+  if IsHighSurrogate(UnicodeChar(Result)) then
+  begin
+    if Length(AString) < Succ(AIndex) then
+      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
+    Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
+    ACharLength := 2;
+  end
+  else
+    ACharLength := 1;
+end;
+
+class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
+begin
+  if not IsHighSurrogate(AHighSurrogate) then
+    raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
+  if not IsLowSurrogate(ALowSurrogate) then
+    raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AHighSurrogate)]);
+  Result := (UCS4Char(AHighSurrogate) - HIGH_SURROGATE_BEGIN) shl 10 + (UCS4Char(ALowSurrogate) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
+end;
+
 class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double; static;
 class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double; static;
 begin
 begin
   Result := GetProps(Word(AChar))^.NumericValue;
   Result := GetProps(Word(AChar))^.NumericValue;
@@ -186,6 +251,8 @@ class function TCharacter.GetNumericValue(
         AIndex  : Integer
         AIndex  : Integer
 ) : Double; static;
 ) : Double; static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := GetNumericValue(AString[AIndex]);
   Result := GetNumericValue(AString[AIndex]);
 end;  
 end;  
 
 
@@ -199,7 +266,9 @@ class function TCharacter.GetUnicodeCategory(
         AIndex  : Integer
         AIndex  : Integer
 ) : TUnicodeCategory; static;
 ) : TUnicodeCategory; static;
 begin   
 begin   
-  Result := GetUnicodeCategory(AString[AIndex]);  
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := GetUnicodeCategory(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean; static;
@@ -212,6 +281,8 @@ class function TCharacter.IsControl(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsControl(AString[AIndex]);
   Result := IsControl(AString[AIndex]);
 end;
 end;
 
 
@@ -225,7 +296,9 @@ class function TCharacter.IsDigit(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsDigit(AString[AIndex]);   
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsDigit(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean; static;
@@ -238,7 +311,9 @@ class function TCharacter.IsSurrogate(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsSurrogate(AString[AIndex]); 
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsSurrogate(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean; static;
@@ -253,7 +328,9 @@ class function TCharacter.IsHighSurrogate(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsHighSurrogate(AString[AIndex]); 
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsHighSurrogate(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean; static;
@@ -268,7 +345,9 @@ class function TCharacter.IsLowSurrogate(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsLowSurrogate(AString[AIndex]); 
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsLowSurrogate(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsSurrogatePair(
 class function TCharacter.IsSurrogatePair(
@@ -290,6 +369,8 @@ class function TCharacter.IsSurrogatePair(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
   Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
 end;
 end;
 
 
@@ -303,6 +384,8 @@ class function TCharacter.IsLetter(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsLetter(AString[AIndex]);
   Result := IsLetter(AString[AIndex]);
 end;
 end;
 
 
@@ -316,7 +399,9 @@ class function TCharacter.IsLetterOrDigit(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsLetterOrDigit(AString[AIndex]); 
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsLetterOrDigit(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean; static;
@@ -329,7 +414,9 @@ class function TCharacter.IsLower(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean; static;
 ) : Boolean; static;
 begin        
 begin        
-  Result := IsLower(AString[AIndex]); 
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  Result := IsLower(AString[AIndex]);
 end;
 end;
 
 
 class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean; static;
 class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean; static;
@@ -342,6 +429,8 @@ class function TCharacter.IsNumber(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsNumber(AString[AIndex]);
   Result := IsNumber(AString[AIndex]);
 end;
 end;
 
 
@@ -355,6 +444,8 @@ class function TCharacter.IsPunctuation(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsPunctuation(AString[AIndex]);
   Result := IsPunctuation(AString[AIndex]);
 end;
 end;
 
 
@@ -368,6 +459,8 @@ class function TCharacter.IsSeparator(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsSeparator(AString[AIndex]);
   Result := IsSeparator(AString[AIndex]);
 end;
 end;
 
 
@@ -381,6 +474,8 @@ class function TCharacter.IsSymbol(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsSymbol(AString[AIndex]);
   Result := IsSymbol(AString[AIndex]);
 end;
 end;
 
 
@@ -394,6 +489,8 @@ class function TCharacter.IsUpper(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsUpper(AString[AIndex]);
   Result := IsUpper(AString[AIndex]);
 end;
 end;
 
 
@@ -407,6 +504,8 @@ class function TCharacter.IsWhiteSpace(
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;static;
 ) : Boolean;static;
 begin
 begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
   Result := IsWhiteSpace(AString[AIndex]);
   Result := IsWhiteSpace(AString[AIndex]);
 end;
 end;
 
 
@@ -459,5 +558,5 @@ begin
     end;
     end;
   end;
   end;
 end;
 end;
-
+{$endif VER2_4}
 end.
 end.

+ 5 - 0
rtl/objpas/rtlconst.inc

@@ -289,6 +289,11 @@ ResourceString
   sWindowsSocketError           = 'A Windows socket error occurred: %s (%d), on API "%s"';
   sWindowsSocketError           = 'A Windows socket error occurred: %s (%d), on API "%s"';
   SWriteError                   = 'Stream write error';
   SWriteError                   = 'Stream write error';
   SYesButton                    = '&Yes';
   SYesButton                    = '&Yes';
+  SStringIndexOutOfRange        = 'String index %d out of range [1 - %d]';
+  SHighSurrogateOutOfRange      = 'High surrogate $%x out of range [$D800 - $DBFF]';
+  SLowSurrogateOutOfRange       = 'Low surrogate $%x out of range [$DC00 - $DFFF]';
+  SInvalidUTF32Char             = 'Invalid UTF32 character $%x. Valid UTF32 character must be in range [$0 - $10FFFF] except surrogate range [$D800-$DFFF]';
+  SInvalidHighSurrogate         = 'Invalid high surrogate at index %d. High surrogate must be followed by a low surrogate pair';
 
 
 { ---------------------------------------------------------------------
 { ---------------------------------------------------------------------
     Keysim Names
     Keysim Names