Explorar o código

+ widestringmanager.codepointlengthproc added, which can be used to
determine the length of a multi-byte character. The return values
are defined to be the same as those of POSIX' mblen: -1 =
invalid/incomplete sequence, 0 = #0, > 0 = length of sequence in
bytes.
+ default implementation for widestringmanager.codepointlengthproc
(assumes all code points have length 1) and Unix implementation
(based on mb(r)len); Windows implementation is still required
* replaced default implementation of
widestringmanager.CharLengthPCharProc with strlen() of the input
instead of an error (correct if all code points have length 1,
still needs Windows implementation)
+ implemented fpc_text_read_{wide,unicode}str() and
fpc_text_read_widechar() (mantis #18163); fpc_text_read_widechar()
uses the new widestringmanager.codepointlengthproc()
+ unicodestring support for readstr/writestr
* fixed declaration of fpc_Write_Text_UnicodeStr (unicodestring
instead of widestring parameter)
* extended test/twide*.pp tests to test the new/fixed functionality

git-svn-id: trunk@16533 -

Jonas Maebe %!s(int64=14) %!d(string=hai) anos
pai
achega
f4c31ecf3c

+ 18 - 3
rtl/inc/compproc.inc

@@ -471,7 +471,7 @@ Procedure fpc_Write_Text_AnsiStr (Len : Longint; Var f : Text; const S : AnsiStr
 Procedure fpc_Write_Text_WideStr (Len : Longint; Var f : Text; const S : WideString); compilerproc;
 {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
-Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : WideString); compilerproc;
+Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : UnicodeString); compilerproc;
 {$endif FPC_HAS_FEATURE_WIDESTRINGS}
 Procedure fpc_Write_Text_SInt(Len : Longint;var t : Text;l : ValSInt); compilerproc;
 Procedure fpc_Write_Text_UInt(Len : Longint;var t : Text;l : ValUInt); compilerproc;
@@ -504,16 +504,22 @@ function fpc_SetupWriteStr_Shortstr(out s: shortstring): PText; compilerproc;
 function fpc_SetupWriteStr_Ansistr(out s: ansistring): PText; compilerproc;
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
-function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc;
+function fpc_SetupWriteStr_Unicodestr(out s: unicodestring): PText; compilerproc;
 {$endif FPC_HAS_FEATURE_WIDESTRINGS}
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
+function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc;
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 
 function fpc_SetupReadStr_Shortstr(const s: shortstring): PText; compilerproc;
 {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
 function fpc_SetupReadStr_Ansistr(const s: ansistring): PText; compilerproc;
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
-function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc;
+function fpc_SetupReadStr_Unicodestr(const s: unicodestring): PText; compilerproc;
 {$endif FPC_HAS_FEATURE_WIDESTRINGS}
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
+function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc;
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 {$endif FPC_HAS_FEATURE_TEXTIO}
 
 {$ifdef FPC_HAS_FEATURE_VARIANTS}
@@ -541,7 +547,16 @@ Procedure fpc_Read_Text_PChar_As_Array(var f : Text;out s : array of char; zerob
 {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
 Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); compilerproc;
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
+{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+Procedure fpc_Read_Text_UnicodeStr(var f : Text;out us : UnicodeString); compilerproc;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
+Procedure fpc_Read_Text_WideStr(var f : Text;out ws : WideString); compilerproc;
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 Procedure fpc_Read_Text_Char(var f : Text; out c : char); compilerproc;
+{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+procedure fpc_Read_Text_WideChar(var f : Text; out wc: widechar); compilerproc;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
 Procedure fpc_Read_Text_Char_Iso(var f : Text; out c : char); compilerproc;
 Procedure fpc_Read_Text_SInt(var f : Text; out l :ValSInt); compilerproc;
 Procedure fpc_Read_Text_UInt(var f : Text; out u :ValUInt); compilerproc;

+ 117 - 9
rtl/inc/text.inc

@@ -689,8 +689,8 @@ begin
 end;
 
 
-{$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
-Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : WideString); iocheck; compilerproc;
+{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : UnicodeString); iocheck; compilerproc;
 {
  Writes a UnicodeString to the Text file T
 }
@@ -714,7 +714,7 @@ begin
     else InOutRes:=103;
   end;
 end;
-{$endif FPC_HAS_FEATURE_ANSISTRINGS}
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
 
 
 {$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
@@ -1288,7 +1288,7 @@ End;
 
 
 {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
-Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); iocheck; compilerproc;
+Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); [public, alias: 'FPC_READ_TEXT_ANSISTR']; iocheck; compilerproc;
 var
   slen,len : SizeInt;
 Begin
@@ -1302,10 +1302,36 @@ Begin
   // Set actual length
   SetLength(S,Slen);
 End;
+
+Procedure fpc_Read_Text_AnsiStr_Intern(var f : Text;out s : AnsiString); [external name 'FPC_READ_TEXT_ANSISTR'];
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
 
 
-procedure fpc_Read_Text_Char(var f : Text; out c: char); iocheck;compilerproc;
+{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+Procedure fpc_Read_Text_UnicodeStr(var f : Text;out us : UnicodeString); iocheck; compilerproc;
+var
+  s: AnsiString;
+Begin
+  // all standard input is assumed to be ansi-encoded
+  fpc_Read_Text_AnsiStr_Intern(f,s);
+  // Convert to unicodestring
+  us:=s;
+End;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
+Procedure fpc_Read_Text_WideStr(var f : Text;out ws : WideString); iocheck; compilerproc;
+var
+  s: AnsiString;
+Begin
+  // all standard input is assumed to be ansi-encoded
+  fpc_Read_Text_AnsiStr_Intern(f,s);
+  // Convert to widestring
+  ws:=s;
+End;
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
+
+procedure fpc_Read_Text_Char(var f : Text; out c: char); [public, alias: 'FPC_READ_TEXT_CHAR']; iocheck;compilerproc;
 Begin
   c:=#0;
   If not CheckRead(f) then
@@ -1319,6 +1345,49 @@ Begin
   inc(TextRec(f).BufPos);
 end;
 
+procedure fpc_Read_Text_Char_intern(var f : Text; out c: char); iocheck; [external name 'FPC_READ_TEXT_CHAR'];
+
+
+{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+procedure fpc_Read_Text_WideChar(var f : Text; out wc: widechar); iocheck;compilerproc;
+var
+  ws: widestring;
+  i: longint;
+  { maximum code point length is 6 characters (with UTF-8) }
+  str: array[0..5] of char;
+Begin
+  fillchar(str[0],sizeof(str),0);
+  for i:=low(str) to high(str) do
+    begin
+      fpc_Read_Text_Char_intern(f,str[i]);
+      case widestringmanager.CodePointLengthProc(@str[0],i+1) of
+        -1: { possibly incomplete code point, try with an extra character }
+           ;
+        0: { null character }
+          begin
+            wc:=#0;
+            exit;
+          end;
+        else
+          begin
+            { valid code point -> convert to widestring}
+            widestringmanager.Ansi2WideMoveProc(@str[0],ws,i+1);
+            { has to be exactly one widechar }
+            if length(ws)=1 then
+              begin
+                wc:=ws[1];
+                exit
+              end
+            else
+              break;
+          end;
+      end;
+    end;
+  { invalid widechar input }
+  inoutres:=106;
+end;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+
 
 procedure fpc_Read_Text_Char_Iso(var f : Text; out c: char); iocheck;compilerproc;
 Begin
@@ -1604,6 +1673,22 @@ end;
 
 
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+procedure WriteStrUnicode(var t: textrec);
+var
+  temp: ansistring;
+  str: punicodestring;
+begin
+  if (t.bufpos=0) then
+    exit;
+  str:=punicodestring(ppointer(@t.userdata[StrPtrIndex])^);
+  setlength(temp,t.bufpos);
+  move(t.bufptr^,temp[1],t.bufpos);
+  str^:=str^+temp;
+  t.bufpos:=0;
+end;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
 procedure WriteStrWide(var t: textrec);
 var
   temp: ansistring;
@@ -1617,8 +1702,7 @@ begin
   str^:=str^+temp;
   t.bufpos:=0;
 end;
-{$endif FPC_HAS_FEATURE_WIDESTRINGS}
-
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 
 procedure SetupWriteStrCommon(out t: textrec);
 begin
@@ -1657,6 +1741,20 @@ end;
 
 
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
+function fpc_SetupWriteStr_Unicodestr(out s: unicodestring): PText; compilerproc;
+begin
+  setupwritestrcommon(ReadWriteStrText);
+  PPointer(@ReadWriteStrText.userdata[StrPtrIndex])^:=@s;
+// automatically done by out-semantics
+//  setlength(s,0);
+  ReadWriteStrText.InOutFunc:=@WriteStrUnicode;
+  ReadWriteStrText.FlushFunc:=@WriteStrUnicode;
+  result:=@ReadWriteStrText;
+end;
+{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+
+
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
 function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc;
 begin
   setupwritestrcommon(ReadWriteStrText);
@@ -1667,7 +1765,7 @@ begin
   ReadWriteStrText.FlushFunc:=@WriteStrWide;
   result:=@ReadWriteStrText;
 end;
-{$endif FPC_HAS_FEATURE_WIDESTRINGS}
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
 
 
 procedure ReadAnsiStrFinal(var t: textrec);
@@ -1763,7 +1861,7 @@ end;
 
 
 {$ifdef FPC_HAS_FEATURE_WIDESTRINGS}
-function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc;
+function fpc_SetupReadStr_Unicodestr(const s: unicodestring): PText; compilerproc;
 begin
   { we use an ansistring to avoid code duplication, and let the    }
   { assignment convert the widestring to an equivalent ansistring  }
@@ -1772,6 +1870,16 @@ end;
 {$endif FPC_HAS_FEATURE_WIDESTRINGS}
 
 
+{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
+function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc;
+begin
+  { we use an ansistring to avoid code duplication, and let the    }
+  { assignment convert the widestring to an equivalent ansistring  }
+  result:=fpc_SetupReadStr_Ansistr_Intern(s);
+end;
+{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
+
+
 {*****************************************************************************
                                Initializing
 *****************************************************************************}

+ 9 - 0
rtl/inc/ustringh.inc

@@ -67,7 +67,16 @@ Type
 }
     CompareWideStringProc : function(const s1, s2 : WideString) : PtrInt;
     CompareTextWideStringProc : function(const s1, s2 : WideString): PtrInt;
+    { return value: number of code points in the string. Whenever an invalid
+      code point is encountered, all characters part of this invalid code point
+      are considered to form one "character" and the next character is
+      considered to be the start of a new (possibly also invalid) code point }
     CharLengthPCharProc : function(const Str: PChar): PtrInt;
+    { return value:
+      -1 if incomplete or invalid code point
+      0 if NULL character,
+      > 0 if that's the length in bytes of the code point }
+    CodePointLengthProc : function(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
 
     UpperAnsiStringProc : function(const s : ansistring) : ansistring;
     LowerAnsiStringProc : function(const s : ansistring) : ansistring;

+ 17 - 8
rtl/inc/ustrings.inc

@@ -88,6 +88,21 @@ begin
 end;
 
 
+function DefaultCharLengthPChar(const Str: PChar): PtrInt;
+  begin
+    DefaultCharLengthPChar:=length(Str);
+  end;
+
+
+function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
+  begin
+    if str[0]<>#0 then
+      DefaultCodePointLength:=1
+    else
+      DefaultCodePointLength:=0;
+  end;
+
+
 Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
 begin
   manager:=widestringmanager;
@@ -2506,13 +2521,6 @@ function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
   begin
     unimplementedunicodestring;
   end;
-
-
-function CharLengthPChar(const Str: PChar): PtrInt;
-  begin
-    unimplementedunicodestring;
-  end;
-
 {$warnings on}
 
 procedure initunicodestringmanager;
@@ -2535,7 +2543,8 @@ procedure initunicodestringmanager;
 {$endif HAS_WIDESTRINGMANAGER}
     widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
     widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
-    widestringmanager.CharLengthPCharProc:=@CharLengthPChar;
+    widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
+    widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
 {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
   end;
 

+ 9 - 0
rtl/win/sysutils.pp

@@ -1357,7 +1357,16 @@ function Win32CompareTextUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
   are relevant already for the system unit }
 procedure InitWin32Widestrings;
   begin
+    { return value: number of code points in the string. Whenever an invalid
+      code point is encountered, all characters part of this invalid code point
+      are considered to form one "character" and the next character is
+      considered to be the start of a new (possibly also invalid) code point }
 //!!!    CharLengthPCharProc : function(const Str: PChar): PtrInt;
+    { return value:
+      -1 if incomplete or invalid code point
+      0 if NULL character,
+      > 0 if that's the length in bytes of the code point }
+//!!!!    CodePointLengthProc : function(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
     widestringmanager.CompareWideStringProc:=@Win32CompareWideString;
     widestringmanager.CompareTextWideStringProc:=@Win32CompareTextWideString;
     widestringmanager.UpperAnsiStringProc:=@Win32AnsiUpperCase;

+ 11 - 1
tests/test/twide1.pp

@@ -5,6 +5,7 @@ uses
   
 var
   w : widestring;
+  u : unicodestring;
   a : ansistring;
   
 begin
@@ -14,6 +15,15 @@ begin
     halt(1);
   a:=w;
   if a[1]<>'A' then
-    halt(1);
+    halt(2);
+  writeln('ok');
+
+  a:='A';
+  u:=a;
+  if u[1]<>#65 then
+    halt(3);
+  a:=u;
+  if a[1]<>'A' then
+    halt(4);
   writeln('ok');
 end.

+ 9 - 0
tests/test/twide2.pp

@@ -6,6 +6,7 @@ uses
 var
   i : longint;
   w,w2 : widestring;
+  u,u2 : unicodestring;
   a : ansistring;
   
 begin
@@ -17,4 +18,12 @@ begin
       a:=w;
       w2:=a;
     end;
+  setlength(u,1000);
+  for i:=1 to 1000 do
+    u[i]:=widechar(i);
+  for i:=1 to 10 do
+    begin
+      a:=u;
+      u2:=a;
+    end;
 end.

+ 46 - 2
tests/test/twide3.pp

@@ -5,32 +5,76 @@
 {$codepage utf-8}
 
 {$mode objfpc}
+
 uses
 {$ifdef unix}
   cwstring,
 {$endif}
-  sysutils;
+  SysUtils;
 
 {$i+}
 
 var
   t: text;
   w: widestring;
+  u: unicodestring;
   a: ansistring;
+  wc: widechar;
 
 begin
   assign(t,'twide3.txt');
   rewrite(t);
   writeln(t,'łóżka');
   close(t);
+  reset(t);
+  
+  try
+    read(t,wc);
+    if wc<>'ł' then
+      raise Exception.create('wrong widechar read: '+inttostr(ord(wc))+'<>'+inttostr(ord('ł')));
+  except
+    close(t);
+//    erase(t);
+    raise;
+  end;
+    
   reset(t);
   try
     readln(t,a);
     w:=a;
     if (w<>'łóżka') then
-      raise Exception.create('wrong string read');
+      raise Exception.create('wrong ansistring read');
+  except
+    close(t);
+    erase(t);
+    raise;
+  end;
+
+  reset(t);
+  try
+    readln(t,w);
+    if (w<>'łóżka') then
+      raise Exception.create('wrong widestring read');
+  except
+    close(t);
+    erase(t);
+    raise;
+  end;
+
+  reset(t);
+  try
+    readln(t,u);
+    if (u<>'łóżka') then
+      raise Exception.create('wrong unicodestring read');
   finally
     close(t);
     erase(t);
   end;
+
+  readstr(u,a);
+  if u<>a then
+    raise Exception.create('wrong readstr(u,a)');
+  readstr(w,a);
+  if w<>u then
+    raise Exception.create('wrong readstr(w,a)');
 end.

+ 12 - 0
tests/test/twide5.pp

@@ -2,6 +2,7 @@
 
 var
   ws: widestring;
+  uns: unicodestring;
   us: UCS4String;
 begin
 // the compiler does not yet support characters which require
@@ -42,4 +43,15 @@ begin
      (ws[7]<>#$d87e) or
      (ws[8]<>#$dc04) then
     halt(3);
+  uns:='鳣ćçŹ'#$d87e#$dc04;
+  if (length(uns)<>8) or
+     (uns[1]<>'é') or
+     (uns[2]<>'ł') or
+     (uns[3]<>'Ł') or
+     (uns[4]<>'ć') or
+     (uns[5]<>'ç') or
+     (uns[6]<>'Ź') or
+     (uns[7]<>#$d87e) or
+     (uns[8]<>#$dc04) then
+    halt(4);
 end.

+ 127 - 2
tests/test/twide6.pp

@@ -13,11 +13,12 @@ procedure doerror(i : integer);
   end;
 
 
-{ normal upper case testing }
+{ normal upper case testing (widestring) }
 procedure testupper;
 var
   s: ansistring;
   w1,w2,w3,w4: widestring;
+  u1,u2,u3,u4: unicodestring;
   i: longint;
 begin
   w1:='aé'#0'èàł'#$d87e#$dc04;
@@ -72,11 +73,74 @@ begin
     doerror(21);
   if (w4 <> w2) then
     doerror(22);
+end;
+
 
+{ normal upper case testing (unicodestring) }
+procedure testupperu;
+var
+  s: ansistring;
+  w1,w2,w3,w4: widestring;
+  u1,u2,u3,u4: unicodestring;
+  i: longint;
+begin
+  w1:='aé'#0'èàł'#$d87e#$dc04;
+  w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original upper: ',w2);
+{$endif print}
+  s:=w1;
+{$ifdef print}
+  writeln('ansi: ',s);
+{$endif print}
+  w3:=s;
+  w4:=AnsiUpperCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=unicodeuppercase(w1);
+{$ifdef print}
+  writeln('wideupper: ',w1);
+  writeln('original upper: ',w2);
+  writeln('ansiupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    doerror(1);
+  if (w4 <> w2) then
+    doerror(2);
+
+  w1:='aéèàł'#$d87e#$dc04;
+  w2:='AÉÈÀŁ'#$d87e#$dc04;
+  s:=w1;
+  w3:=s;
+  w4:=AnsiStrUpper(pchar(s));
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=unicodeuppercase(w1);
+{$ifdef print}
+  writeln('unicodeupper: ',w1);
+  writeln('ansistrupper: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    doerror(21);
+  if (w4 <> w2) then
+    doerror(22);
 end;
 
 
-{ normal lower case testing }
+
+{ normal lower case testing (widestring) }
 procedure testlower;
 var
   s: ansistring;
@@ -135,6 +199,63 @@ begin
 end;
 
 
+{ normal lower case testing (unicodestring) }
+procedure testloweru;
+var
+  s: ansistring;
+  w1,w2,w3,w4: unicodestring;
+  i: longint;
+begin
+  w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+  w2:='aé'#0'èàł'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+  writeln('original: ',w1);
+  writeln('original lower: ',w2);
+{$endif print}
+  s:=w1;
+  w3:=s;
+  w4:=AnsiLowerCase(s);
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=unicodelowercase(w1);
+{$ifdef print}
+  writeln('unicodelower: ',w1);
+  writeln('ansilower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    doerror(3);
+  if (w4 <> w2) then
+    doerror(4);
+
+
+  w1:='AÉÈÀŁ'#$d87e#$dc04;
+  w2:='aéèàł'#$d87e#$dc04;
+  s:=w1;
+  w3:=s;
+  w4:=AnsiStrLower(pchar(s));
+  { filter out unsupported characters }
+  for i:=1 to length(w3) do
+    if w3[i]='?' then
+      begin
+        w2[i]:='?';
+        w1[i]:='?';
+      end;
+  w1:=unicodelowercase(w1);
+{$ifdef print}
+  writeln('unicodelower: ',w1);
+  writeln('ansistrlower: ',w4);
+{$endif print}
+  if (w1 <> w2) then
+    doerror(3);
+  if (w4 <> w2) then
+    doerror(4);
+end;
 
 { upper case testing with a missing utf-16 pair at the end }
 procedure testupperinvalid;
@@ -377,8 +498,12 @@ end;
 begin
   testupper;
   writeln;
+  testupperu;
+  writeln;
   testlower;
   writeln;
+  testloweru;
+  writeln;
   writeln;
   testupperinvalid;
   writeln;