Browse Source

rtl: patch from Inoussa to complete TCharacter class implementation (mantis #0020302):
* ToLower is functional for all characters including those outside of the BMP
* ToUpper is functional for all characters including those outside of the BMP
* Other methods using "const AString : UnicodeString; AIndex : Integer" are
functional for all characters including those outside of the BMP

git-svn-id: trunk@19286 -

paul 14 years ago
parent
commit
647218fd13

+ 6 - 0
.gitattributes

@@ -10627,10 +10627,14 @@ tests/test/umacpas1.pp svneol=native#text/plain
 tests/test/umainnam.pp svneol=native#text/plain
 tests/test/umainnam.pp svneol=native#text/plain
 tests/test/units/character/tgetnumericvalue.pp svneol=native#text/pascal
 tests/test/units/character/tgetnumericvalue.pp svneol=native#text/pascal
 tests/test/units/character/tgetnumericvalue2.pp svneol=native#text/pascal
 tests/test/units/character/tgetnumericvalue2.pp svneol=native#text/pascal
+tests/test/units/character/tgetnumericvalue3.pp svneol=native#text/pascal
+tests/test/units/character/tgetunicodecategoriesurro.pp svneol=native#text/pascal
 tests/test/units/character/tiscontrol.pp svneol=native#text/pascal
 tests/test/units/character/tiscontrol.pp svneol=native#text/pascal
 tests/test/units/character/tiscontrol2.pp svneol=native#text/pascal
 tests/test/units/character/tiscontrol2.pp svneol=native#text/pascal
+tests/test/units/character/tiscontrol3.pp svneol=native#text/pascal
 tests/test/units/character/tisdigit.pp svneol=native#text/pascal
 tests/test/units/character/tisdigit.pp svneol=native#text/pascal
 tests/test/units/character/tisdigit2.pp svneol=native#text/pascal
 tests/test/units/character/tisdigit2.pp svneol=native#text/pascal
+tests/test/units/character/tisdigit3.pp svneol=native#text/pascal
 tests/test/units/character/tishighsurrogate.pp svneol=native#text/pascal
 tests/test/units/character/tishighsurrogate.pp svneol=native#text/pascal
 tests/test/units/character/tisletter.pp svneol=native#text/pascal
 tests/test/units/character/tisletter.pp svneol=native#text/pascal
 tests/test/units/character/tisletter2.pp svneol=native#text/pascal
 tests/test/units/character/tisletter2.pp svneol=native#text/pascal
@@ -10652,8 +10656,10 @@ tests/test/units/character/tlowercase.pp svneol=native#text/pascal
 tests/test/units/character/tlowercase2.pp svneol=native#text/pascal
 tests/test/units/character/tlowercase2.pp svneol=native#text/pascal
 tests/test/units/character/ttolower.pp svneol=native#text/pascal
 tests/test/units/character/ttolower.pp svneol=native#text/pascal
 tests/test/units/character/ttolower2.pp svneol=native#text/pascal
 tests/test/units/character/ttolower2.pp svneol=native#text/pascal
+tests/test/units/character/ttolower3.pp svneol=native#text/pascal
 tests/test/units/character/ttoupper.pp svneol=native#text/pascal
 tests/test/units/character/ttoupper.pp svneol=native#text/pascal
 tests/test/units/character/ttoupper2.pp svneol=native#text/pascal
 tests/test/units/character/ttoupper2.pp svneol=native#text/pascal
+tests/test/units/character/ttoupper3.pp svneol=native#text/pascal
 tests/test/units/character/tutf32convert.pp svneol=native#text/pascal
 tests/test/units/character/tutf32convert.pp svneol=native#text/pascal
 tests/test/units/classes/tmakeobjinst.pp svneol=native#text/plain
 tests/test/units/classes/tmakeobjinst.pp svneol=native#text/plain
 tests/test/units/classes/tsetstream.pp svneol=native#text/plain
 tests/test/units/classes/tsetstream.pp svneol=native#text/plain

+ 160 - 187
rtl/objpas/character.pas

@@ -47,10 +47,14 @@ type
     ucPrivateUse,                  // Co = Other, private use
     ucPrivateUse,                  // Co = Other, private use
     ucUnassigned                   // Cn = Other, not assigned (including noncharacters)  
     ucUnassigned                   // Cn = Other, not assigned (including noncharacters)  
   );
   );
+  TUnicodeCategorySet = set of TUnicodeCategory;
 
 
   { TCharacter }
   { TCharacter }
 
 
   TCharacter = class sealed
   TCharacter = class sealed
+  private
+    class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
+    class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
   public
   public
     constructor Create;
     constructor Create;
 
 
@@ -62,49 +66,49 @@ type
     class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
     class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
     class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
     class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
     
     
-    class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static;
+    class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
     class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
     class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
     
     
-    class function IsControl(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
     
     
-    class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;                  
+    class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
     
     
-    class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static;
+    class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
     class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;  
     class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;  
-    class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static;
+    class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
     class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;  
     class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;  
-    class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static;
+    class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
     class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
     class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
-    class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline; inline;
     class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
     class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
     
     
-    class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;  
+    class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
     
     
-    class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;         
+    class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
     
     
-    class function IsLower(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline; inline;
+    class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
 
 
-    class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static;
+    class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
     class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
     class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
 
 
-    class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
 
 
-    class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
 
 
-    class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
 
 
-    class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static;
-    class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
+    class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
+    class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
 
 
-    class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static;
+    class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
     class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
     class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
 
 
     class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
     class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
@@ -457,6 +461,44 @@ end;
 
 
 { TCharacter }
 { TCharacter }
 
 
+class function TCharacter.TestCategory(
+  const AString : UnicodeString;
+        AIndex  : Integer;
+        ACategory : TUnicodeCategory
+) : Boolean;
+var
+  pu : PUC_Prop;
+begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  pu := GetProps(Word(AString[AIndex]));
+  if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
+    if not IsSurrogatePair(AString,AIndex) then
+      raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+    pu := GetProps(AString[AIndex],AString[AIndex+1]);
+  end;
+  Result := (pu^.Category = ACategory);
+end;
+
+class function TCharacter.TestCategory(
+  const AString : UnicodeString;
+        AIndex : Integer;
+        ACategory : TUnicodeCategorySet
+) : Boolean;
+var
+  pu : PUC_Prop;
+begin
+  if (AIndex < 1) or (AIndex > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  pu := GetProps(Word(AString[AIndex]));
+  if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
+    if not IsSurrogatePair(AString,AIndex) then
+      raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+    pu := GetProps(AString[AIndex],AString[AIndex+1]);
+  end;
+  Result := (pu^.Category in ACategory);
+end;
+
 constructor TCharacter.Create;
 constructor TCharacter.Create;
 begin
 begin
   raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
   raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
@@ -528,10 +570,18 @@ class function TCharacter.GetNumericValue(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : Double;
 ) : Double;
+var
+  pu : PUC_Prop;
 begin
 begin
   if (AIndex < 1) or (AIndex > Length(AString)) then
   if (AIndex < 1) or (AIndex > Length(AString)) then
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  Result := GetNumericValue(AString[AIndex]);
+  pu := GetProps(Word(AString[AIndex]));
+  if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
+    if not IsSurrogatePair(AString,AIndex) then
+      raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+    pu := GetProps(AString[AIndex],AString[AIndex+1]);
+  end;
+  Result := pu^.NumericValue;
 end;  
 end;  
 
 
 class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
 class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
@@ -543,10 +593,18 @@ class function TCharacter.GetUnicodeCategory(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : TUnicodeCategory;
 ) : TUnicodeCategory;
+var
+  pu : PUC_Prop;
 begin   
 begin   
   if (AIndex < 1) or (AIndex > Length(AString)) then
   if (AIndex < 1) or (AIndex > Length(AString)) then
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  Result := GetUnicodeCategory(AString[AIndex]);
+  pu := GetProps(Word(AString[AIndex]));
+  if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
+    if not IsSurrogatePair(AString,AIndex) then
+      raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+    pu := GetProps(AString[AIndex],AString[AIndex+1]);
+  end;
+  Result := pu^.Category;
 end;
 end;
 
 
 class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
@@ -558,10 +616,8 @@ class function TCharacter.IsControl(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;
 ) : Boolean;
-begin        
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  Result := IsControl(AString[AIndex]);
+begin
+  Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
 end;
 end;
 
 
 class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
@@ -573,10 +629,8 @@ class function TCharacter.IsDigit(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;
 ) : Boolean;
-begin        
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  Result := IsDigit(AString[AIndex]);
+begin
+  Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
 end;
 end;
 
 
 class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
@@ -649,6 +703,12 @@ class function TCharacter.IsSurrogatePair(
 begin
 begin
   if (AIndex < 1) or (AIndex > Length(AString)) then
   if (AIndex < 1) or (AIndex > Length(AString)) then
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
+  if not IsHighSurrogate(AString[AIndex]) then begin
+    Result := False;
+    exit;
+  end;
+  if ((AIndex+1) > Length(AString)) then
+    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
   Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
   Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
 end;
 end;
 
 
@@ -661,23 +721,8 @@ class function TCharacter.IsLetter(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;
 ) : Boolean;
-var
-  c : UnicodeChar;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString,Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in LETTER_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsLetter(c);
+  Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
@@ -688,24 +733,9 @@ end;
 class function TCharacter.IsLetterOrDigit(
 class function TCharacter.IsLetterOrDigit(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;     
-var
-  c : UnicodeChar;
+) : Boolean;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in LETTER_OR_DIGIT_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsLetterOrDigit(c);           
+  Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
@@ -717,23 +747,8 @@ class function TCharacter.IsLower(
   const AString : UnicodeString;  
   const AString : UnicodeString;  
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;
 ) : Boolean;
-var
-  c : UnicodeChar;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category = TUnicodeCategory.ucLowercaseLetter)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsLower(c);
+  Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
 end;
 end;
 
 
 class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
@@ -745,23 +760,8 @@ class function TCharacter.IsNumber(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
 ) : Boolean;
 ) : Boolean;
-var
-  c : UnicodeChar;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in NUMBER_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsNumber(c);
+  Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
@@ -772,24 +772,9 @@ end;
 class function TCharacter.IsPunctuation(
 class function TCharacter.IsPunctuation(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;   
-var
-  c : UnicodeChar;
+) : Boolean;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in PUNCTUATION_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsPunctuation(c);   
+  Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
 class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
@@ -800,24 +785,9 @@ end;
 class function TCharacter.IsSeparator(
 class function TCharacter.IsSeparator(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;          
-var
-  c : UnicodeChar;
+) : Boolean;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in SEPARATOR_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsSeparator(c);  
+  Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
 class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
@@ -828,24 +798,9 @@ end;
 class function TCharacter.IsSymbol(
 class function TCharacter.IsSymbol(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;                   
-var
-  c : UnicodeChar;
+) : Boolean;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := (GetProps(c, AString[Succ(AIndex)])^.Category in SYMBOL_CATEGORIES)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsSymbol(c);      
+  Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
 end;
 end;
 
 
 class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
@@ -856,24 +811,9 @@ end;
 class function TCharacter.IsUpper(
 class function TCharacter.IsUpper(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;                        
-var
-  c : UnicodeChar;
+) : Boolean;
 begin
 begin
-  if (AIndex < 1) or (AIndex > Length(AString)) then
-    raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString,Succ(AIndex)) then
-      Result := (GetProps(c,AString[Succ(AIndex)])^.Category = TUnicodeCategory.ucUppercaseLetter)
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsUpper(c);  
+  Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
 end;
 end;
 
 
 class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
 class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
@@ -884,24 +824,19 @@ end;
 class function TCharacter.IsWhiteSpace(
 class function TCharacter.IsWhiteSpace(
   const AString : UnicodeString;
   const AString : UnicodeString;
         AIndex  : Integer
         AIndex  : Integer
-) : Boolean;                           
+) : Boolean;
 var
 var
-  c : UnicodeChar;
+  pu : PUC_Prop;
 begin
 begin
   if (AIndex < 1) or (AIndex > Length(AString)) then
   if (AIndex < 1) or (AIndex > Length(AString)) then
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
     raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
-  c := AString[AIndex];
-  if IsHighSurrogate(c) then
-  begin
-    if Length(AString) < Succ(AIndex) then
-      raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
-    if IsLowSurrogate(AString, Succ(AIndex)) then
-      Result := GetProps(c,AString[AIndex+1])^.WhiteSpace
-    else
-      raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
-  end
-  else
-    Result := IsWhiteSpace(c);   
+  pu := GetProps(Word(AString[AIndex]));
+  if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
+    if not IsSurrogatePair(AString,AIndex) then
+      raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+    pu := GetProps(AString[AIndex],AString[AIndex+1]);
+  end;
+  Result := pu^.WhiteSpace;
 end;
 end;
 
 
 class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
 class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
@@ -937,22 +872,27 @@ begin
         if locIsSurrogate then begin
         if locIsSurrogate then begin
           Inc(pp);
           Inc(pp);
           Inc(pr);
           Inc(pr);
+          Inc(i);
           pr^ := pp^;
           pr^ := pp^;
         end;
         end;
       end else begin
       end else begin
         if (pu^.SimpleLowerCase <= $FFFF) then begin
         if (pu^.SimpleLowerCase <= $FFFF) then begin
           pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
           pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
         end else begin
         end else begin
-          FromUCS4(UCS4Char(pu^.SimpleLowerCase),pr^,(pr+1)^);
+          FromUCS4(UCS4Char(pu^.SimpleLowerCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
           Inc(pr);
           Inc(pr);
         end;
         end;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(i);
+        end;
       end;
       end;
       Inc(pp);
       Inc(pp);
       Inc(pr);
       Inc(pr);
       Inc(i);
       Inc(i);
     end;
     end;
     Dec(pp);
     Dec(pp);
-    i := ((pr - (@Result[1])) div SizeOf(UnicodeChar));
+    i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
     SetLength(Result,i)
     SetLength(Result,i)
   end;
   end;
 end;
 end;
@@ -968,17 +908,50 @@ class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString
 var
 var
   i, c : SizeInt;
   i, c : SizeInt;
   pp, pr : PUnicodeChar;
   pp, pr : PUnicodeChar;
+  pu : PUC_Prop;
+  locIsSurrogate : Boolean;
 begin
 begin
   c := Length(AString);
   c := Length(AString);
-  SetLength(Result,c);
+  SetLength(Result,2*c);
   if (c > 0) then begin
   if (c > 0) then begin
     pp := @AString[1];
     pp := @AString[1];
     pr := @Result[1];
     pr := @Result[1];
-    for i := 1 to c do begin
-      pr^ := ToUpper(pp^);
+    i := 1;
+    while (i <= c) do begin
+      pu := GetProps(Word(pp^));
+      locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
+      if locIsSurrogate then begin
+        if not IsSurrogatePair(AString,i) then
+          raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
+        pu := GetProps(pp^,AString[i+1]);
+      end;
+      if (pu^.SimpleUpperCase = 0) then begin
+        pr^ := pp^;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(pr);
+          Inc(i);
+          pr^ := pp^;
+        end;
+      end else begin
+        if (pu^.SimpleUpperCase <= $FFFF) then begin
+          pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
+        end else begin
+          FromUCS4(UCS4Char(pu^.SimpleUpperCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
+          Inc(pr);
+        end;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(i);
+        end;
+      end;
       Inc(pp);
       Inc(pp);
       Inc(pr);
       Inc(pr);
+      Inc(i);
     end;
     end;
+    Dec(pp);
+    i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
+    SetLength(Result,i)
   end;
   end;
 end;
 end;
 {$endif VER2_4}
 {$endif VER2_4}

+ 61 - 0
tests/test/units/character/tgetnumericvalue3.pp

@@ -0,0 +1,61 @@
+program tgetnumericvalue3;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s, s2, s3 : UnicodeString;
+  d : Double;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA1));
+  d := 1;
+  if (TCharacter.GetNumericValue(s,1) <> d) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetNumericValue(s) = ',TCharacter.GetNumericValue(s,1));
+    DoError(e,s);
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA3));
+  d := 3;
+  if (TCharacter.GetNumericValue(s,1) <> d) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetNumericValue(s) = ',TCharacter.GetNumericValue(s,1));
+    DoError(e,s);
+  end;  
+  
+  WriteLn('ok');
+end.
+

+ 58 - 0
tests/test/units/character/tgetunicodecategoriesurro.pp

@@ -0,0 +1,58 @@
+program tgetunicodecategoriesurro;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s : UnicodeString;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA1));
+  if (TCharacter.GetUnicodeCategory(s,1) <> TUnicodeCategory.ucDecimalNumber) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetUnicodeCategory(s) = ',TCharacter.GetUnicodeCategory(s,1));
+    DoError(e,s);
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA3));
+  if (TCharacter.GetUnicodeCategory(s,1) <> TUnicodeCategory.ucDecimalNumber) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetUnicodeCategory(s) = ',TCharacter.GetUnicodeCategory(s,1));
+    DoError(e,s);
+  end;  
+  
+  WriteLn('ok');
+end.
+

+ 58 - 0
tests/test/units/character/tiscontrol3.pp

@@ -0,0 +1,58 @@
+program tisdigit3;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s : UnicodeString;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA1));
+  if not TCharacter.IsDigit(s,1) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.IsDigit(s) = ',TCharacter.IsDigit(s,1));
+    DoError(e,s);
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA3));
+  if not TCharacter.IsDigit(s,1) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.IsDigit(s) = ',TCharacter.IsDigit(s,1));
+    DoError(e,s);
+  end;  
+  
+  WriteLn('ok');
+end.
+

+ 61 - 0
tests/test/units/character/tisdigit3.pp

@@ -0,0 +1,61 @@
+program tgetnumericvalue3;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s, s2, s3 : UnicodeString;
+  d : Double;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA1));
+  d := 1;
+  if (TCharacter.GetNumericValue(s,1) <> d) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetNumericValue(s) = ',TCharacter.GetNumericValue(s,1));
+    DoError(e,s);
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DCA3));
+  d := 3;
+  if (TCharacter.GetNumericValue(s,1) <> d) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.GetNumericValue(s) = ',TCharacter.GetNumericValue(s,1));
+    DoError(e,s);
+  end;  
+  
+  WriteLn('ok');
+end.
+

+ 61 - 0
tests/test/units/character/ttolower3.pp

@@ -0,0 +1,61 @@
+program ttolower3;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s, s2, s3 : UnicodeString;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC28));
+  s2 := TCharacter.ToLower(s);
+  if (s2 <> s) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.ToLower(s) = ',DumpStr(s2));
+    DoError(e,TCharacter.ToLower(s));
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC21));
+  s2 := TCharacter.ToLower(s);
+  s3 := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC49));//Actual 
+  if (s2 <> s3) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.ToLower(s) = ',DumpStr(s2),' ; Expected = ',DumpStr(s3));
+    DoError(e,TCharacter.ToLower(s));
+  end;  
+  
+  WriteLn('ok');
+end.
+

+ 61 - 0
tests/test/units/character/ttoupper3.pp

@@ -0,0 +1,61 @@
+program ttoupper3;
+
+{$ifdef FPC}
+  {$mode objfpc}
+  {$H+}
+  {$PACKENUM 1}
+{$endif fpc} 
+
+{$ifndef FPC}
+  {$APPTYPE CONSOLE}    
+{$endif}
+  
+uses     
+  SysUtils,
+  character;
+    
+{$ifndef FPC}
+  type UnicodeChar = WideChar;   
+{$endif} 
+
+function DumpStr(a : UnicodeString) : UnicodeString;
+var
+  i : Integer;
+  s : UnicodeString;
+begin
+  s := '';
+  for i := 1 to Length(a) do
+    s := s + Format('#%x',[Word(a[i])]);
+  Result := s; 
+end;
+    
+procedure DoError(ACode : Integer; ACodePoint : UnicodeString); overload;
+begin
+  WriteLn('Error #',ACode,' ; String = ',DumpStr(ACodePoint));
+  Halt(Acode);
+end;         
+
+var
+  e : Integer;
+  s, s2, s3 : UnicodeString;
+begin  
+  e := 1; 
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC16));
+  s2 := TCharacter.ToUpper(s);
+  if (s2 <> s) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.ToUpper(s) = ',DumpStr(s2));
+    DoError(e,TCharacter.ToUpper(s));
+  end;  
+
+  Inc(e);
+  s := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC40));
+  s2 := TCharacter.ToUpper(s);
+  s3 := UnicodeChar(Word($D801)) + UnicodeChar(Word($DC18));//Actual 
+  if (s2 <> s3) then begin
+    WriteLn('s=',DumpStr(s),' ; TCharacter.ToUpper(s) = ',DumpStr(s2),' ; Expected = ',DumpStr(s3));
+    DoError(e,TCharacter.ToUpper(s));
+  end;  
+  
+  WriteLn('ok');
+end.
+