Explorar el Código

rtl: apply patch of Inoussa which drops unicode manager dependency on SysUtils (issue #0024887)

git-svn-id: trunk@25312 -
paul hace 12 años
padre
commit
e913ab1f9f
Se han modificado 3 ficheros con 163 adiciones y 122 borrados
  1. 14 116
      rtl/objpas/character.pas
  2. 8 6
      rtl/objpas/fpwidestring.pp
  3. 141 0
      rtl/objpas/unicodedata.pas

+ 14 - 116
rtl/objpas/character.pas

@@ -820,64 +820,13 @@ begin
 end;
 
 class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
-var
-  i, c : SizeInt;
-  pp, pr : PUnicodeChar;
-  pu : PUC_Prop;
-  locIsSurrogate, locIgnoreInvalid : Boolean;
-begin
-  c := Length(AString);
-  SetLength(Result,2*c);
-  if (c > 0) then begin
-    locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
-    pp := @AString[1];
-    pr := @Result[1];
-    i := 1;
-    while (i <= c) do begin
-      pu := GetProps(Word(pp^));
-      locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
-      if locIsSurrogate then begin
-        if locIgnoreInvalid then begin
-          if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
-            pr^ := pp^;
-            Inc(pp);
-            Inc(pr);
-            Inc(i);
-            Continue;
-          end;
-        end;
-        if not IsSurrogatePair(AString,i) then
-          raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
-        pu := GetProps(pp^,AString[i+1]);
-      end;
-      if (pu^.SimpleLowerCase = 0) then begin
-        pr^ := pp^;
-        if locIsSurrogate then begin
-          Inc(pp);
-          Inc(pr);
-          Inc(i);
-          pr^ := pp^;
-        end;
-      end else begin
-        if (pu^.SimpleLowerCase <= $FFFF) then begin
-          pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
-        end else begin
-          FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
-          Inc(pr);
-        end;
-        if locIsSurrogate then begin
-          Inc(pp);
-          Inc(i);
-        end;
-      end;
-      Inc(pp);
-      Inc(pr);
-      Inc(i);
-    end;
-    Dec(pp);
-    i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
-    SetLength(Result,i)
-  end;
+begin
+  if (UnicodeToLower(
+       AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
+       ) <> 0
+     )
+  then
+    raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
 end;
 
 class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
@@ -893,64 +842,13 @@ begin
 end;
 
 class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
-var
-  i, c : SizeInt;
-  pp, pr : PUnicodeChar;
-  pu : PUC_Prop;
-  locIsSurrogate, locIgnoreInvalid : Boolean;
-begin
-  c := Length(AString);
-  SetLength(Result,2*c);
-  if (c > 0) then begin
-    locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
-    pp := @AString[1];
-    pr := @Result[1];
-    i := 1;
-    while (i <= c) do begin
-      pu := GetProps(Word(pp^));
-      locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
-      if locIsSurrogate then begin
-        if locIgnoreInvalid then begin
-          if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
-            pr^ := pp^;
-            Inc(pp);
-            Inc(pr);
-            Inc(i);
-            Continue;
-          end;
-        end;
-        if not IsSurrogatePair(AString,i) then
-          raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
-        pu := GetProps(pp^,AString[i+1]);
-      end;
-      if (pu^.SimpleUpperCase = 0) then begin
-        pr^ := pp^;
-        if locIsSurrogate then begin
-          Inc(pp);
-          Inc(pr);
-          Inc(i);
-          pr^ := pp^;
-        end;
-      end else begin
-        if (pu^.SimpleUpperCase <= $FFFF) then begin
-          pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
-        end else begin
-          FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
-          Inc(pr);
-        end;
-        if locIsSurrogate then begin
-          Inc(pp);
-          Inc(i);
-        end;
-      end;
-      Inc(pp);
-      Inc(pr);
-      Inc(i);
-    end;
-    Dec(pp);
-    i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
-    SetLength(Result,i)
-  end;
+begin
+  if (UnicodeToUpper(
+       AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
+       ) <> 0
+     )
+  then
+    raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
 end;
 
 {$endif VER2_4}

+ 8 - 6
rtl/objpas/fpwidestring.pp

@@ -21,7 +21,7 @@ uses
 {$ifdef Unix}
   unixcp,
 {$endif}
-  sysutils, character, charset;
+  charset;
   
 procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
 {$ifdef MSWINDOWS}
@@ -29,7 +29,7 @@ procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
 {$endif MSWINDOWS}
 
 const
-  CharacterOptions = [TCharacterOption.coIgnoreInvalidSequence];
+  IgnoreInvalidSequenceFlag = True;
 var
   OldManager : TUnicodeStringManager;
 
@@ -379,7 +379,8 @@ end;
 
 function UpperUnicodeString(const S: UnicodeString): UnicodeString;
 begin
-  Result:=TCharacter.ToUpper(s,CharacterOptions);
+  if (UnicodeToUpper(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
+    system.error(reRangeError);
 end;
 
 function UpperWideString(const S: WideString): WideString;
@@ -392,7 +393,8 @@ end;
 
 function LowerUnicodeString(const S: UnicodeString): UnicodeString;
 begin
-  Result:=TCharacter.ToLower(s,CharacterOptions);
+  if (UnicodeToLower(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
+    system.error(reRangeError);
 end;
 
 function LowerWideString(const S: WideString): WideString;
@@ -527,7 +529,7 @@ begin
           ulen:=getunicode(p,mblen,locMap,@us[1]);
           if (Length(us)<>ulen) then
             SetLength(us,ulen);
-          usl:=TCharacter.ToUpper(us,CharacterOptions);
+          usl:=UpperUnicodeString(us);
           for k:=1 to Length(usl) do
             begin
               aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));
@@ -598,7 +600,7 @@ begin
           ulen:=getunicode(p,mblen,locMap,@us[1]);
           if (Length(us)<>ulen) then
             SetLength(us,ulen);
-          usl:=TCharacter.ToLower(us,CharacterOptions);
+          usl:=LowerUnicodeString(us);
           for k:=1 to Length(usl) do
             begin
               aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));

+ 141 - 0
rtl/objpas/unicodedata.pas

@@ -284,6 +284,7 @@ type
 
 const
   ROOT_COLLATION_NAME = 'DUCET';
+  ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
 
   procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
   function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
@@ -293,6 +294,16 @@ const
   ) : Boolean;inline;
   function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
   function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
+  function UnicodeToUpper(
+    const AString                : UnicodeString;
+    const AIgnoreInvalidSequence : Boolean;
+    out   AResultString          : UnicodeString
+  ) : Integer;
+  function UnicodeToLower(
+    const AString                : UnicodeString;
+    const AIgnoreInvalidSequence : Boolean;
+    out   AResultString          : UnicodeString
+  ) : Integer;
 
   function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
   function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
@@ -997,7 +1008,137 @@ begin
   Result := GetProps(h,l);
 end;
 
+function UnicodeToUpper(
+  const AString                : UnicodeString;
+  const AIgnoreInvalidSequence : Boolean;
+  out   AResultString          : UnicodeString
+) : Integer;
+var
+  i, c : SizeInt;
+  pp, pr : PUnicodeChar;
+  pu : PUC_Prop;
+  locIsSurrogate : Boolean;
+  r : UnicodeString;
+begin
+  c := Length(AString);
+  SetLength(r,2*c);
+  if (c > 0) then begin
+    pp := @AString[1];
+    pr := @r[1];
+    i := 1;
+    while (i <= c) do begin
+      pu := GetProps(Word(pp^));
+      locIsSurrogate := (pu^.Category = UGC_Surrogate);
+      if locIsSurrogate then begin
+        if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
+          if AIgnoreInvalidSequence then begin
+            pr^ := pp^;
+            Inc(pp);
+            Inc(pr);
+            Inc(i);
+            Continue;
+          end;
+          exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
+        end;
+        pu := GetProps(pp^,AString[i+1]);
+      end;
+      if (pu^.SimpleUpperCase = 0) then begin
+        pr^ := pp^;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(pr);
+          Inc(i);
+          pr^ := pp^;
+        end;
+      end else begin
+        if (pu^.SimpleUpperCase <= $FFFF) then begin
+          pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
+        end else begin
+          FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
+          Inc(pr);
+        end;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(i);
+        end;
+      end;
+      Inc(pp);
+      Inc(pr);
+      Inc(i);
+    end;
+    Dec(pp);
+    i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
+    SetLength(r,i);
+    AResultString := r;
+    Result := 0;
+  end;
+end;
 
+function UnicodeToLower(
+  const AString                : UnicodeString;
+  const AIgnoreInvalidSequence : Boolean;
+  out   AResultString          : UnicodeString
+) : Integer;
+var
+  i, c : SizeInt;
+  pp, pr : PUnicodeChar;
+  pu : PUC_Prop;
+  locIsSurrogate : Boolean;
+  r : UnicodeString;
+begin
+  c := Length(AString);
+  SetLength(r,2*c);
+  if (c > 0) then begin
+    pp := @AString[1];
+    pr := @r[1];
+    i := 1;
+    while (i <= c) do begin
+      pu := GetProps(Word(pp^));
+      locIsSurrogate := (pu^.Category = UGC_Surrogate);
+      if locIsSurrogate then begin
+        if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
+          if AIgnoreInvalidSequence then begin
+            pr^ := pp^;
+            Inc(pp);
+            Inc(pr);
+            Inc(i);
+            Continue;
+          end;
+          exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
+        end;
+        pu := GetProps(pp^,AString[i+1]);
+      end;
+      if (pu^.SimpleLowerCase = 0) then begin
+        pr^ := pp^;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(pr);
+          Inc(i);
+          pr^ := pp^;
+        end;
+      end else begin
+        if (pu^.SimpleLowerCase <= $FFFF) then begin
+          pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
+        end else begin
+          FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
+          Inc(pr);
+        end;
+        if locIsSurrogate then begin
+          Inc(pp);
+          Inc(i);
+        end;
+      end;
+      Inc(pp);
+      Inc(pr);
+      Inc(i);
+    end;
+    Dec(pp);
+    i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
+    SetLength(r,i);
+    AResultString := r;
+    Result := 0;
+  end;
+end;
 
 //----------------------------------------------------------------------
 function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;