Bläddra i källkod

* fixed wchar_t type (was: widechar, now is cint/cint32/long depending on
platform)
+ mbstate_t type for all unixes except BeOS (doesn't exist for BeOS)
+ implemented UpperAnsiStringProc/LowerAnsiStringProc for unix
* fixed Ansi2UCS4Move in cwstring (although it isn't used anywhere
currently)
+ test for Upper/LowerAnsiString

git-svn-id: trunk@9393 -

Jonas Maebe 17 år sedan
förälder
incheckning
68595c8b72
8 ändrade filer med 685 tillägg och 10 borttagningar
  1. 1 0
      .gitattributes
  2. 1 1
      rtl/beos/ptypes.inc
  3. 9 2
      rtl/darwin/ptypes.inc
  4. 8 1
      rtl/freebsd/ptypes.inc
  5. 13 1
      rtl/linux/ptypes.inc
  6. 14 1
      rtl/solaris/ptypes.inc
  7. 300 4
      rtl/unix/cwstring.pp
  8. 339 0
      tests/test/twide6.pp

+ 1 - 0
.gitattributes

@@ -7300,6 +7300,7 @@ tests/test/twide2.pp svneol=native#text/plain
 tests/test/twide3.pp svneol=native#text/plain
 tests/test/twide4.pp svneol=native#text/plain
 tests/test/twide5.pp svneol=native#text/plain
+tests/test/twide6.pp svneol=native#text/plain
 tests/test/twrstr1.pp svneol=native#text/plain
 tests/test/twrstr2.pp svneol=native#text/plain
 tests/test/twrstr3.pp svneol=native#text/plain

+ 1 - 1
rtl/beos/ptypes.inc

@@ -90,7 +90,7 @@ type
     pTime    = ^time_t;
     ptime_t =  ^time_t;
     
-    wchar_t   = widechar;
+    wchar_t   = cint32;
     pwchar_t  = ^wchar_t;
 
     socklen_t= cuint32;

+ 9 - 2
rtl/darwin/ptypes.inc

@@ -77,9 +77,9 @@ type
     pTime    = ^time_t;
     ptime_t  = ^time_t;
 
-    wchar_t  = widechar;
+    wchar_t  = cint32;
     pwchar_t = ^wchar_t;
-    wint_t   = cint;
+    wint_t   = cint32;
 
     socklen_t= cuint32;
     TSocklen = socklen_t;
@@ -163,6 +163,13 @@ type
        end;
     pstatfs = ^tstatfs;
 
+    mbstate_t = record
+      case byte of
+        0: (__mbstate8: array[0..127] of char);
+        1: (_mbstateL: clonglong); { for alignment }
+    end;
+    pmbstate_t = ^mbstate_t;
+
    pthread_t            = pointer;
    pthread_attr_t       = record sig: clong; opaque: array[0..{$ifdef cpu64}56{$else}36{$endif}-1] of byte; end;
    pthread_mutex_t      = {$i pmutext.inc}

+ 8 - 1
rtl/freebsd/ptypes.inc

@@ -80,7 +80,7 @@ type
     pUid     = ^Uid_t;
 
     wint_t    = cint32;
-    wchar_t   = widechar;
+    wchar_t   = cint32;
     pwchar_t  = ^wchar_t;
 
 
@@ -186,6 +186,13 @@ type
   end;
   PStatFS=^TStatFS;
 
+    mbstate_t = record
+      case byte of
+        0: (__mbstate8: array[0..127] of char);
+        1: (_mbstateL: cint64); { for alignment }
+    end;
+    pmbstate_t = ^mbstate_t;
+
   ITimerVal= Record
               It_Interval,
               It_Value      : TimeVal;

+ 13 - 1
rtl/linux/ptypes.inc

@@ -102,7 +102,7 @@ Type
     pTime     = ^time_t;
     ptime_t   = ^time_t;
 
-    wchar_t   = widechar;
+    wchar_t   = cint32;
     pwchar_t  = ^wchar_t;
 
 {$ifdef cpu64}
@@ -154,6 +154,18 @@ Type
   end;
   PStatFS=^TStatFS;
 
+  mbstate_value_t = record
+    case byte of
+      0: (__wch: wint_t);
+      1: (__wchb: array[0..3] of char);
+  end;
+  
+  mbstate_t = record
+    __count: cint;
+    __value: mbstate_value_t;
+  end;
+  pmbstate_t = ^mbstate_t;
+
   pthread_t = culong;
 
   sched_param = record

+ 14 - 1
rtl/solaris/ptypes.inc

@@ -129,7 +129,11 @@ Type
     uint_t    = cuint;
 
 
-    wchar_t   = widechar;
+{$ifdef cpu64}
+    wchar_t   = cint;
+{$else cpu64}
+    wchar_t   = clong;
+{$endif cpu64}
     pwchar_t  = ^wchar_t;
 
     uid_t    = cuint32;         { used for user ID type        }
@@ -168,6 +172,15 @@ Type
   end;
   PStatFS=^TStatFS;
 
+  mbstate_t = record
+{$ifdef cpu64}
+        __filler: array[0..3] of clong;
+{$else cpu64}
+        __filler: array[0..5] of cint;
+{$endif cpu64}
+  end;
+  pmbstate_t = ^mbstate_t;
+  
 
   clock32_t = int32_t;
   timeval32 = record

+ 300 - 4
rtl/unix/cwstring.pp

@@ -14,6 +14,7 @@
  **********************************************************************}
 
 {$mode objfpc}
+{$inline on}
 
 unit cwstring;
 
@@ -45,11 +46,22 @@ Const
 {$endif}
 
 { helper functions from libc }
+function tolower(__wc:cint):cint;cdecl;external libiconvname name 'tolower';
+function toupper(__wc:cint):cint;cdecl;external libiconvname name 'toupper';
 function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
 function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
+
 function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
 function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
 function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
+{$ifndef beos}
+function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
+function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
+{$else beos}
+function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
+function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
+{$endif beos}
+
 
 const
 {$ifdef linux}
@@ -253,6 +265,92 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
   end;
 
 
+(*
+function LowerWideString(const s : WideString) : WideString;
+  var
+    i, slen   : SizeInt;
+{$if sizeof(wchar_t) = 4}
+    nc        : wint_t;
+    resindex  : SizeInt;
+    len       : longint;
+    valid     : boolean;
+{$elseif sizeof(wchar_t) = 2}
+    p : PWideChar;
+{$endif sizeof(wchar_t)}
+  begin
+    slen:=length(s);
+    SetLength(result,slen);
+{$if sizeof(wint_t) = 4}
+    i:=1;
+    resindex:=1;
+    while (i<=slen) do
+      begin
+        nc:=utf16toutf32(s,i,len,valid);
+        inc(i,len);
+        if (valid) then
+          ConcatUTF32ToWideStr(towlower(nc),result,resindex)
+        else
+          { do nothing }
+          pwidechar(@result[i])^:=s[i];
+      end;
+    { adjust length }
+    setlength(result,resindex-1);
+{$elseif sizeof(wchar_t) = 2}
+    { avoid unique calls for each character }
+    p:=@result[1];
+    { this will fail for surrogate pairs, but that's inherent }
+    { to choosing sizeof(wint_t)=2, and nothing we can help   }
+    for i:=1 to length(s) do
+      p[i]:=WideChar(towlower(wint_t(s[i])));
+{$else}
+{$error Unsupported wchar_t size}
+{$endif}
+  end;
+
+
+function UpperWideString(const s : WideString) : WideString;
+  var
+    i, slen   : SizeInt;
+{$if sizeof(wint_t) = 4}
+    nc        : wint_t;
+    resindex  : SizeInt;
+    len       : longint;
+    valid     : boolean;
+{$elseif sizeof(wchar_t) = 2}
+    p : PWideChar;
+{$endif sizeof(wchar_t)}
+  begin
+    slen:=length(s);
+    SetLength(result,slen);
+{$if sizeof(wchar_t) = 4}
+    i:=1;
+    resindex:=1;
+    while (i<=slen) do
+      begin
+        nc:=utf16toutf32(s,i,len,valid);
+        inc(i,len);
+        if (valid) then
+          ConcatUTF32ToWideStr (towupper(nc),result,resindex)
+        else
+          { do nothing }
+          pwidechar(@result[i])^:=s[i];
+      end;
+    { adjust length }
+    setlength(result,resindex-1);
+{$elseif sizeof(wchar_t) = 2}
+    { avoid unique calls for each character }
+    p:=@result[1];
+    { this will fail for surrogate pairs, but that's inherent }
+    { to choosing sizeof(wint_t)=2, and nothing we can help   }
+    for i:=1 to length(s) do
+      p[i]:=WideChar(towupper(wint_t(s[i])));
+{$else}
+{$error Unsupported wchar_t size}
+{$endif}
+  end;
+*)
+
+
 function LowerWideString(const s : WideString) : WideString;
   var
     i : SizeInt;
@@ -273,11 +371,193 @@ function UpperWideString(const s : WideString) : WideString;
   end;
 
 
+procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
+begin
+  if (len>length(s)) then
+    if (length(s) < 10*256) then
+      setlength(s,length(s)+10)
+    else
+      setlength(s,length(s)+length(s) shr 8);
+end;
+
+
+procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
+begin
+  EnsureAnsiLen(s,index);
+  pchar(@s[index])^:=c;
+  inc(index);
+end;
+
+
+{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
+{$ifndef beos}
+procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
+{$else not beos}
+procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
+{$endif beos}
+var
+  p     : pchar;
+  mblen : size_t;
+begin
+  { we know that s is unique -> avoid uniquestring calls}
+  p:=@s[index];
+  if (nc<$7f) then
+    ConcatCharToAnsiStr(char(nc),s,index)
+  else
+    begin
+      { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4   }
+      { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
+      { that's a libc macro mapped to internal functions/variables  }
+      { and thus not a stable external API on systems where libc    }
+      { breaks backwards compatibility every now and then           }
+      EnsureAnsiLen(s,index+10);
+{$ifndef beos}
+      mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
+{$else not beos}
+      mblen:=wctomb(p,wchar_t(nc));
+{$endif not beos}
+      if (mblen<>size_t(-1)) then
+        inc(index,mblen)
+      else
+        begin
+          { invalid wide char }
+          p^:='?';
+          inc(index);
+        end;
+    end;
+end;
+
+
+function LowerAnsiString(const s : AnsiString) : AnsiString;
+  var
+    i, slen,
+    resindex : SizeInt;
+    mblen    : size_t;
+{$ifndef beos}
+    ombstate,
+    nmbstate : mbstate_t;
+{$endif beos}
+    wc       : wchar_t;
+  begin
+    fillchar(ombstate,sizeof(ombstate),0);
+    fillchar(nmbstate,sizeof(nmbstate),0);
+    slen:=length(s);
+    SetLength(result,slen+10);
+    i:=1;
+    resindex:=1;
+    while (i<=slen) do
+      begin
+        if (s[i]<=#127) then
+          begin
+            ConcatCharToAnsiStr(char(tolower(cint(s[i]))),result,resindex);
+            inc(i)
+          end
+        else
+          begin
+{$ifndef beos}
+            mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
+{$else not beos}
+            mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
+{$endif not beos}
+            case mblen of
+              size_t(-2):
+                begin
+                  { partial invalid character, copy literally }
+                  while (i<=slen) do
+                    begin
+                      ConcatCharToAnsiStr(s[i],result,resindex);
+                      inc(i);
+                    end;
+                end;
+              size_t(-1), 0:
+                begin
+                  { invalid or null character }
+                  ConcatCharToAnsiStr(s[i],result,resindex);
+                  inc(i);
+                end;
+              else
+                begin
+                  { a valid sequence }
+                  { even if mblen = 1, the lowercase version may have a }
+                  { different length                                     }
+                  { We can't do anything special if wchar_t is 16 bit... }
+{$ifndef beos}
+                  ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
+{$else not beos}
+                  ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
+{$endif not beos}
+                  inc(i,mblen);
+                end;
+            end;
+          end;
+      end;
+    SetLength(result,resindex-1);
+  end;
+
+
+function UpperAnsiString(const s : AnsiString) : AnsiString;
+  var
+    i, slen,
+    resindex : SizeInt;
+    mblen    : size_t;
+    ombstate,
+    nmbstate : mbstate_t;
+    wc       : wchar_t;
+  begin
+    fillchar(ombstate,sizeof(ombstate),0);
+    fillchar(nmbstate,sizeof(nmbstate),0);
+    slen:=length(s);
+    SetLength(result,slen+10);
+    i:=1;
+    resindex:=1;
+    while (i<=slen) do
+      begin
+        if (s[i]<=#127) then
+          begin
+            ConcatCharToAnsiStr(char(toupper(cint(s[i]))),result,resindex);
+            inc(i)
+          end
+        else
+          begin
+            mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
+            case mblen of
+              size_t(-2):
+                begin
+                  { partial invalid character, copy literally }
+                  while (i<=slen) do
+                    begin
+                      ConcatCharToAnsiStr(s[i],result,resindex);
+                      inc(i);
+                    end;
+                end;
+              size_t(-1), 0:
+                begin
+                  { invalid or null character }
+                  ConcatCharToAnsiStr(s[i],result,resindex);
+                  inc(i);
+                end;
+              else
+                begin
+                  { a valid sequence }
+                  { even if mblen = 1, the uppercase version may have a }
+                  { different length                                     }
+                  { We can't do anything special if wchar_t is 16 bit... }
+                  ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
+                  inc(i,mblen);
+                end;
+            end;
+          end;
+      end;
+    SetLength(result,resindex-1);
+  end;
+
+
 procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
   var
     outlength,
     outoffset,
     outleft : size_t;
+    err: cint;
     srcpos,
     destpos: pchar;
     mynil : pchar;
@@ -294,7 +574,22 @@ procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
     outleft:=outlength*4;
     while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
       begin
-        case fpgetCerrno of
+        err:=fpgetCerrno;
+        case err of
+         ESysEINVAL,
+         ESysEILSEQ:
+            begin
+              { skip and set to '?' }
+              inc(srcpos);
+              dec(len);
+              plongint(destpos)^:=longint('?');
+              inc(destpos,4);
+              dec(outleft,4);
+              { reset }
+              iconv(iconv_ansi2ucs4,@mynil,@my0,@mynil,@my0);
+              if err=ESysEINVAL then
+                break;
+            end;
           ESysE2BIG:
             begin
               outoffset:=destpos-pchar(dest);
@@ -375,9 +670,10 @@ begin
       CompareTextWideStringProc:=@CompareTextWideString;
       {
       CharLengthPCharProc
-
-      UpperAnsiStringProc
-      LowerAnsiStringProc
+      }
+      UpperAnsiStringProc:=@UpperAnsiString;
+      LowerAnsiStringProc:=@LowerAnsiString;
+      {
       CompareStrAnsiStringProc
       CompareTextAnsiStringProc
       }

+ 339 - 0
tests/test/twide6.pp

@@ -0,0 +1,339 @@
+{$codepage utf-8}
+uses
+ {$ifdef unix}
+ cwstring,
+ {$endif}
+  sysutils;
+
+
+{ normal upper case testing }
+procedure testupper;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  w1:='aé'#0'èàł'#$d87e#$dc04;
+  w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original upper: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiUpperCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=wideuppercase(w1);
+{$ifdef print}
+  writeln('wideupper: ',w1);
+  writeln('ansiupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(1);
+  if (w4 <> w2) then
+    halt(2);
+end;
+
+
+{ normal lower case testing }
+procedure testlower;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+  w2:='aé'#0'èàł'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original lower: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiLowerCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=widelowercase(w1);
+{$ifdef print}
+  writeln('widelower: ',w1);
+  writeln('ansilower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(3);
+  if (w4 <> w2) then
+    halt(4);
+end;
+
+
+
+{ upper case testing with a missing utf-16 pair at the end }
+procedure testupperinvalid;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  { missing utf-16 pair at end }
+  w1:='aé'#0'èàł'#$d87e;
+  w2:='AÉ'#0'ÈÀŁ'#$d87e;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original upper: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiUpperCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=wideuppercase(w1);
+{$ifdef print}
+  writeln('wideupper: ',w1);
+  writeln('ansiupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(5);
+  if (w4 <> w2) then
+    halt(6);
+end;
+
+
+{ lower case testing with a missing utf-16 pair at the end }
+procedure testlowerinvalid;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  { missing utf-16 pair at end}
+  w1:='AÉ'#0'ÈÀŁ'#$d87e;
+  w2:='aé'#0'èàł'#$d87e;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original lower: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiLowerCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=widelowercase(w1);
+{$ifdef print}
+  writeln('widelower: ',w1);
+  writeln('ansilower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(7);
+  if (w4 <> w2) then
+    halt(8);
+end;
+
+
+
+{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
+procedure testupperinvalid1;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  { missing utf-16 pair at end with char after it}
+  w1:='aé'#0'èàł'#$d87e'j';
+  w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original upper: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiUpperCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=wideuppercase(w1);
+{$ifdef print}
+  writeln('wideupper: ',w1);
+  writeln('ansiupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(9);
+  if (w4 <> w2) then
+    halt(10);
+end;
+
+
+{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
+procedure testlowerinvalid1;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  { missing utf-16 pair at end with char after it}
+  w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
+  w2:='aé'#0'èàł'#$d87e'j';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original lower: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiLowerCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=widelowercase(w1);
+{$ifdef print}
+  writeln('widelower: ',w1);
+  writeln('ansilower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(11);
+  if (w4 <> w2) then
+    halt(12);
+end;
+
+
+{ upper case testing with corrupting the utf-8 string after conversion }
+procedure testupperinvalid2;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  w1:='aé'#0'èàł'#$d87e#$dc04'ö';
+  w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original upper: ',w2);
+{$endif print}
+  s:=w1;
+  { truncate the last utf-8 character }
+  setlength(s,length(s)-1);
+  w3:=s;
+  { adjust checking values for new length due to corruption }
+  if length(w3)<>length(w2) then
+    begin
+      setlength(w2,length(w3)); 
+      setlength(w1,length(w3)); 
+    end;
+  w4:=AnsiUpperCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=wideuppercase(w1);
+{$ifdef print}
+  writeln('wideupper: ',w1);
+  writeln('ansiupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(13);
+  if (w4 <> w2) then
+    halt(14);
+end;
+
+
+{ lower case testing with corrupting the utf-8 string after conversion }
+procedure testlowerinvalid2;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  i: longint;
+begin
+  w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
+  w2:='aé'#0'èàł'#$d87e#$dc04'ö';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original lower: ',w2);
+{$endif print}
+  s:=w1;
+  { truncate the last utf-8 character }
+  setlength(s,length(s)-1);
+  w3:=s;
+  { adjust checking values for new length due to corruption }
+  if length(w3)<>length(w2) then
+    begin
+      setlength(w2,length(w3)); 
+      setlength(w1,length(w3)); 
+    end;
+  w4:=AnsiLowerCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=widelowercase(w1);
+{$ifdef print}
+  writeln('widelower: ',w1);
+  writeln('ansilower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    halt(15);
+  if (w4 <> w2) then
+    halt(16);
+end;
+
+
+
+begin
+  testupper;
+  writeln;
+  testlower;
+  writeln;
+  writeln;
+  testupperinvalid;
+  writeln;
+  testlowerinvalid;
+  writeln;
+  writeln;
+  testupperinvalid1;
+  writeln;
+  testlowerinvalid1;
+  writeln;
+  writeln;
+  testupperinvalid2;
+  writeln;
+  testlowerinvalid2;
+end.