17 years ago · cc4f9e5643
--- a/.gitattributes
+++ b/.gitattributes
@@ -106,6 +106,7 @@ compiler/avr/rgcpu.pas svneol=native#text/plain
 
															 compiler/browcol.pas svneol=native#text/plain
														
 
															 compiler/bsdcompile -text
														
 
															 compiler/catch.pas svneol=native#text/plain
														
 
															+compiler/ccharset.pas svneol=native#text/plain
														
 
															 compiler/cclasses.pas svneol=native#text/plain
														
 
															 compiler/cfidwarf.pas svneol=native#text/plain
														
 
															 compiler/cfileutl.pas svneol=native#text/plain
														
@@ -7838,6 +7839,8 @@ tests/test/tunistr1.pp svneol=native#text/plain
 
															 tests/test/tunistr2.pp svneol=native#text/plain
														
 
															 tests/test/tunistr4.pp svneol=native#text/plain
														
 
															 tests/test/tunistr5.pp svneol=native#text/plain
														
 
															+tests/test/tunistr6.pp svneol=native#text/plain
														
 
															+tests/test/tunistr7.pp svneol=native#text/plain
														
 
															 tests/test/tunit1.pp svneol=native#text/plain
														
 
															 tests/test/tunit2.pp svneol=native#text/plain
														
 
															 tests/test/tunit3.pp svneol=native#text/plain
														
--- a/compiler/ccharset.pas
+++ b/compiler/ccharset.pas
@@ -0,0 +1,254 @@
 
															+{
														
 
															+    This file is part of the Free Pascal run time library.
														
 
															+    Copyright (c) 2000 by Florian Klaempfl
														
 
															+    member of the Free Pascal development team.
														
 
															+
														
 
															+    This unit implements several classes for charset conversions
														
 
															+
														
 
															+    See the file COPYING.FPC, included in this distribution,
														
 
															+    for details about the copyright.
														
 
															+
														
 
															+    This program is distributed in the hope that it will be useful,
														
 
															+    but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
														
 
															+
														
 
															+ **********************************************************************}
														
 
															+
														
 
															+{ this unit is included temporarily for 2.2 bootstrapping and can be
														
 
															+  removed after the next release after 2.2.2 }
														
 
															+{$mode objfpc}
														
 
															+unit ccharset;
														
 
															+
														
 
															+  interface
														
 
															+
														
 
															+    type
														
 
															+       tunicodechar = word;
														
 
															+       tunicodestring = ^tunicodechar;
														
 
															+
														
 
															+       tcsconvert = class
														
 
															+         // !!!!!!1constructor create;
														
 
															+       end;
														
 
															+
														
 
															+       tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
														
 
															+         umf_unused);
														
 
															+
														
 
															+       punicodecharmapping = ^tunicodecharmapping;
														
 
															+       tunicodecharmapping = record
														
 
															+          unicode : tunicodechar;
														
 
															+          flag : tunicodecharmappingflag;
														
 
															+          reserved : byte;
														
 
															+       end;
														
 
															+
														
 
															+       punicodemap = ^tunicodemap;
														
 
															+       tunicodemap = record
														
 
															+          cpname : string[20];
														
 
															+          map : punicodecharmapping;
														
 
															+          lastchar : longint;
														
 
															+          next : punicodemap;
														
 
															+          internalmap : boolean;
														
 
															+       end;
														
 
															+
														
 
															+       tcp2unicode = class(tcsconvert)
														
 
															+       end;
														
 
															+
														
 
															+    function loadunicodemapping(const cpname,f : string) : punicodemap;
														
 
															+    procedure registermapping(p : punicodemap);
														
 
															+    function getmap(const s : string) : punicodemap;
														
 
															+    function mappingavailable(const s : string) : boolean;
														
 
															+    function getunicode(c : char;p : punicodemap) : tunicodechar;
														
 
															+    function getascii(c : tunicodechar;p : punicodemap) : string;
														
 
															+
														
 
															+  implementation
														
 
															+
														
 
															+    var
														
 
															+       mappings : punicodemap;
														
 
															+
														
 
															+    function loadunicodemapping(const cpname,f : string) : punicodemap;
														
 
															+
														
 
															+      var
														
 
															+         data : punicodecharmapping;
														
 
															+         datasize : longint;
														
 
															+         t : text;
														
 
															+         s,hs : string;
														
 
															+         scanpos,charpos,unicodevalue : longint;
														
 
															+         code : word;
														
 
															+         flag : tunicodecharmappingflag;
														
 
															+         p : punicodemap;
														
 
															+         lastchar : longint;
														
 
															+
														
 
															+      begin
														
 
															+         lastchar:=-1;
														
 
															+         loadunicodemapping:=nil;
														
 
															+         datasize:=256;
														
 
															+         getmem(data,sizeof(tunicodecharmapping)*datasize);
														
 
															+         assign(t,f);
														
 
															+         {$I-}
														
 
															+         reset(t);
														
 
															+         {$I+}
														
 
															+         if ioresult<>0 then
														
 
															+           begin
														
 
															+              freemem(data,sizeof(tunicodecharmapping)*datasize);
														
 
															+              exit;
														
 
															+           end;
														
 
															+         while not(eof(t)) do
														
 
															+           begin
														
 
															+              readln(t,s);
														
 
															+              if (s[1]='0') and (s[2]='x') then
														
 
															+                begin
														
 
															+                   flag:=umf_unused;
														
 
															+                   scanpos:=3;
														
 
															+                   hs:='$';
														
 
															+                   while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
														
 
															+                     begin
														
 
															+                        hs:=hs+s[scanpos];
														
 
															+                        inc(scanpos);
														
 
															+                     end;
														
 
															+                   val(hs,charpos,code);
														
 
															+                   if code<>0 then
														
 
															+                     begin
														
 
															+                        freemem(data,sizeof(tunicodecharmapping)*datasize);
														
 
															+                        close(t);
														
 
															+                        exit;
														
 
															+                     end;
														
 
															+                   while not(s[scanpos] in ['0','#']) do
														
 
															+                     inc(scanpos);
														
 
															+                   if s[scanpos]='#' then
														
 
															+                     begin
														
 
															+                        { special char }
														
 
															+                        unicodevalue:=$ffff;
														
 
															+                        hs:=copy(s,scanpos,length(s)-scanpos+1);
														
 
															+                        if hs='#DBCS LEAD BYTE' then
														
 
															+                          flag:=umf_leadbyte;
														
 
															+                     end
														
 
															+                   else
														
 
															+                     begin
														
 
															+                        { C hex prefix }
														
 
															+                        inc(scanpos,2);
														
 
															+                        hs:='$';
														
 
															+                        while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
														
 
															+                          begin
														
 
															+                             hs:=hs+s[scanpos];
														
 
															+                             inc(scanpos);
														
 
															+                          end;
														
 
															+                        val(hs,unicodevalue,code);
														
 
															+                        if code<>0 then
														
 
															+                          begin
														
 
															+                             freemem(data,sizeof(tunicodecharmapping)*datasize);
														
 
															+                             close(t);
														
 
															+                             exit;
														
 
															+                          end;
														
 
															+                        if charpos>datasize then
														
 
															+                          begin
														
 
															+                             { allocate 1024 bytes more because         }
														
 
															+                             { if we need more than 256 entries it's    }
														
 
															+                             { probably a mbcs with a lot of            }
														
 
															+                             { entries                                  }
														
 
															+                             datasize:=charpos+1024;
														
 
															+                             reallocmem(data,sizeof(tunicodecharmapping)*datasize);
														
 
															+                          end;
														
 
															+                        flag:=umf_noinfo;
														
 
															+                     end;
														
 
															+                   data[charpos].flag:=flag;
														
 
															+                   data[charpos].unicode:=unicodevalue;
														
 
															+                   if charpos>lastchar then
														
 
															+                     lastchar:=charpos;
														
 
															+                end;
														
 
															+           end;
														
 
															+         close(t);
														
 
															+         new(p);
														
 
															+         p^.lastchar:=lastchar;
														
 
															+         p^.cpname:=cpname;
														
 
															+         p^.internalmap:=false;
														
 
															+         p^.next:=nil;
														
 
															+         p^.map:=data;
														
 
															+         loadunicodemapping:=p;
														
 
															+      end;
														
 
															+
														
 
															+    procedure registermapping(p : punicodemap);
														
 
															+
														
 
															+      begin
														
 
															+         p^.next:=mappings;
														
 
															+         mappings:=p;
														
 
															+      end;
														
 
															+
														
 
															+    function getmap(const s : string) : punicodemap;
														
 
															+
														
 
															+      var
														
 
															+         hp : punicodemap;
														
 
															+
														
 
															+      const
														
 
															+         mapcache : string = '';
														
 
															+         mapcachep : punicodemap = nil;
														
 
															+
														
 
															+      begin
														
 
															+         if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
														
 
															+           begin
														
 
															+              getmap:=mapcachep;
														
 
															+              exit;
														
 
															+           end;
														
 
															+         hp:=mappings;
														
 
															+         while assigned(hp) do
														
 
															+           begin
														
 
															+              if hp^.cpname=s then
														
 
															+                begin
														
 
															+                   getmap:=hp;
														
 
															+                   mapcache:=s;
														
 
															+                   mapcachep:=hp;
														
 
															+                   exit;
														
 
															+                end;
														
 
															+              hp:=hp^.next;
														
 
															+           end;
														
 
															+         getmap:=nil;
														
 
															+      end;
														
 
															+
														
 
															+    function mappingavailable(const s : string) : boolean;
														
 
															+
														
 
															+      begin
														
 
															+         mappingavailable:=getmap(s)<>nil;
														
 
															+      end;
														
 
															+
														
 
															+    function getunicode(c : char;p : punicodemap) : tunicodechar;
														
 
															+
														
 
															+      begin
														
 
															+         if ord(c)<=p^.lastchar then
														
 
															+           getunicode:=p^.map[ord(c)].unicode
														
 
															+         else
														
 
															+           getunicode:=0;
														
 
															+      end;
														
 
															+
														
 
															+    function getascii(c : tunicodechar;p : punicodemap) : string;
														
 
															+
														
 
															+      var
														
 
															+         i : longint;
														
 
															+
														
 
															+      begin
														
 
															+         { at least map to space }
														
 
															+         getascii:=#32;
														
 
															+         for i:=0 to p^.lastchar do
														
 
															+           if p^.map[i].unicode=c then
														
 
															+             begin
														
 
															+                if i<256 then
														
 
															+                  getascii:=chr(i)
														
 
															+                else
														
 
															+                  getascii:=chr(i div 256)+chr(i mod 256);
														
 
															+                exit;
														
 
															+             end;
														
 
															+      end;
														
 
															+
														
 
															+  var
														
 
															+     hp : punicodemap;
														
 
															+
														
 
															+initialization
														
 
															+  mappings:=nil;
														
 
															+finalization
														
 
															+  while assigned(mappings) do
														
 
															+    begin
														
 
															+       hp:=mappings^.next;
														
 
															+       if not(mappings^.internalmap) then
														
 
															+         begin
														
 
															+            freemem(mappings^.map);
														
 
															+            dispose(mappings);
														
 
															+         end;
														
 
															+       mappings:=hp;
														
 
															+    end;
														
 
															+end.
														
--- a/compiler/cp1251.pas
+++ b/compiler/cp1251.pas
@@ -6,7 +6,7 @@ unit cp1251;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/cp437.pas
+++ b/compiler/cp437.pas
@@ -6,7 +6,7 @@ unit cp437;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/cp850.pas
+++ b/compiler/cp850.pas
@@ -6,7 +6,7 @@ unit cp850;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/cp866.pas
+++ b/compiler/cp866.pas
@@ -6,7 +6,7 @@ unit cp866;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/cp8859_1.pas
+++ b/compiler/cp8859_1.pas
@@ -6,7 +6,7 @@ unit cp8859_1;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/cp8859_5.pas
+++ b/compiler/cp8859_5.pas
@@ -6,7 +6,7 @@ unit cp8859_5;
 
															   implementation
														
 
															   uses
														
 
															-     charset;
														
 
															+     {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
														
 
															   const
														
 
															      map : array[0..255] of tunicodecharmapping = (
														
--- a/compiler/ncgcon.pas
+++ b/compiler/ncgcon.pas
@@ -270,7 +270,7 @@ implementation
 
															          pooltype: TConstPoolType;
														
 
															          pool: THashSet;
														
 
															          entry: PHashSetItem;
														
 
															-         
														
 
															+
														
 
															       const
														
 
															         PoolMap: array[tconststringtype] of TConstPoolType = (
														
 
															           sp_conststr,
														
@@ -295,7 +295,7 @@ implementation
 
															               if current_asmdata.ConstPools[pooltype] = nil then
														
 
															                 current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False);
														
 
															               pool := current_asmdata.ConstPools[pooltype];
														
 
															-           
														
 
															+
														
 
															               if cst_type in [cst_widestring, cst_unicodestring] then
														
 
															                 entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size)
														
 
															               else
														
@@ -402,7 +402,7 @@ implementation
 
															                    end;
														
 
															                 end;
														
 
															            end;
														
 
															-         if cst_type in [cst_ansistring, cst_widestring] then
														
 
															+         if cst_type in [cst_ansistring, cst_widestring, cst_unicodestring] then
														
 
															            begin
														
 
															              location_reset(location, LOC_REGISTER, OS_ADDR);
														
 
															              reference_reset_symbol(href, lab_str, 0);
														
@@ -412,8 +412,8 @@ implementation
 
															          else
														
 
															            begin
														
 
															              location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef));
														
 
															-         location.reference.symbol:=lab_str;
														
 
															-      end;
														
 
															+             location.reference.symbol:=lab_str;
														
 
															+           end;
														
 
															       end;
														
--- a/compiler/options.pas
+++ b/compiler/options.pas
@@ -71,7 +71,7 @@ implementation
 
															 uses
														
 
															   widestr,
														
 
															-  charset,
														
 
															+  {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2},
														
 
															   SysUtils,
														
 
															   version,
														
 
															   cutils,cmsgs,
														
--- a/compiler/widestr.pas
+++ b/compiler/widestr.pas
@@ -28,8 +28,7 @@ unit widestr;
 
															   interface
														
 
															     uses
														
 
															-       charset,globtype
														
 
															-       ;
														
 
															+       {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2},globtype;
														
 
															     type
														
--- a/rtl/win32/system.pp
+++ b/rtl/win32/system.pp
@@ -899,10 +899,6 @@ end;
 
															 {$endif Set_i386_Exception_handler}
														
 
															-{****************************************************************************
														
 
															-                      OS dependend widestrings
														
 
															-****************************************************************************}
														
 
															-
														
 
															 const
														
 
															   { MultiByteToWideChar  }
														
 
															      MB_PRECOMPOSED = 1;
														
@@ -918,6 +914,9 @@ function CharUpperBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD;
 
															 function CharLowerBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD;
														
 
															     stdcall; external 'user32' name 'CharLowerBuffW';
														
 
															+{******************************************************************************
														
 
															+                              Widestring
														
 
															+ ******************************************************************************}
														
 
															 procedure Win32Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
														
 
															   var
														
@@ -947,13 +946,57 @@ procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
 
															 function Win32WideUpper(const s : WideString) : WideString;
														
 
															   begin
														
 
															     result:=s;
														
 
															-    UniqueString(result);
														
 
															     if length(result)>0 then
														
 
															       CharUpperBuff(LPWSTR(result),length(result));
														
 
															   end;
														
 
															 function Win32WideLower(const s : WideString) : WideString;
														
 
															+  begin
														
 
															+    result:=s;
														
 
															+    if length(result)>0 then
														
 
															+      CharLowerBuff(LPWSTR(result),length(result));
														
 
															+  end;
														
 
															+
														
 
															+{******************************************************************************
														
 
															+                              Unicode
														
 
															+ ******************************************************************************}
														
 
															+
														
 
															+procedure Win32Unicode2AnsiMove(source:punicodechar;var dest:ansistring;len:SizeInt);
														
 
															+  var
														
 
															+    destlen: SizeInt;
														
 
															+  begin
														
 
															+    // retrieve length including trailing #0
														
 
															+    // not anymore, because this must also be usable for single characters
														
 
															+    destlen:=WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, nil, 0, nil, nil);
														
 
															+    // this will null-terminate
														
 
															+    setlength(dest, destlen);
														
 
															+    WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, @dest[1], destlen, nil, nil);
														
 
															+  end;
														
 
															+
														
 
															+procedure Win32Ansi2UnicodeMove(source:pchar;var dest:UnicodeString;len:SizeInt);
														
 
															+  var
														
 
															+    destlen: SizeInt;
														
 
															+  begin
														
 
															+    // retrieve length including trailing #0
														
 
															+    // not anymore, because this must also be usable for single characters
														
 
															+    destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, nil, 0);
														
 
															+    // this will null-terminate
														
 
															+    setlength(dest, destlen);
														
 
															+    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, @dest[1], destlen);
														
 
															+  end;
														
 
															+
														
 
															+
														
 
															+function Win32UnicodeUpper(const s : UnicodeString) : UnicodeString;
														
 
															+  begin
														
 
															+    result:=s;
														
 
															+    UniqueString(result);
														
 
															+    if length(result)>0 then
														
 
															+      CharUpperBuff(LPWSTR(result),length(result));
														
 
															+  end;
														
 
															+
														
 
															+
														
 
															+function Win32UnicodeLower(const s : UnicodeString) : UnicodeString;
														
 
															   begin
														
 
															     result:=s;
														
 
															     UniqueString(result);
														
@@ -966,10 +1009,17 @@ function Win32WideLower(const s : WideString) : WideString;
 
															   are only relevant for the sysutils units }
														
 
															 procedure InitWin32Widestrings;
														
 
															   begin
														
 
															+    { Widestring }
														
 
															     widestringmanager.Wide2AnsiMoveProc:=@Win32Wide2AnsiMove;
														
 
															     widestringmanager.Ansi2WideMoveProc:=@Win32Ansi2WideMove;
														
 
															     widestringmanager.UpperWideStringProc:=@Win32WideUpper;
														
 
															     widestringmanager.LowerWideStringProc:=@Win32WideLower;
														
 
															+
														
 
															+    { Unicode }
														
 
															+    widestringmanager.Unicode2AnsiMoveProc:=@Win32Unicode2AnsiMove;
														
 
															+    widestringmanager.Ansi2UnicodeMoveProc:=@Win32Ansi2UnicodeMove;
														
 
															+    widestringmanager.UpperUnicodeStringProc:=@Win32UnicodeUpper;
														
 
															+    widestringmanager.LowerUnicodeStringProc:=@Win32UnicodeLower;
														
 
															   end;
														
@@ -1198,3 +1248,4 @@ begin
 
															   InitWin32Widestrings;
														
 
															   DispCallByIDProc:=@DoDispCallByIDError;
														
 
															 end.
														
 
															+
														
--- a/tests/test/tunistr5.pp
+++ b/tests/test/tunistr5.pp
@@ -20,18 +20,18 @@ begin
 
															      (ws[7]<>#$d87e) or
														
 
															      (ws[8]<>#$dc04) then
														
 
															     halt(1);
														
 
															-  us:=WideStringToUCS4String(ws);
														
 
															+  us:=UnicodeStringToUCS4String(ws);
														
 
															   if (length(us)<>8) or
														
 
															-     (us[0]<>UCS4Char(widechar('é'))) or
														
 
															-     (us[1]<>UCS4Char(widechar('ł'))) or
														
 
															-     (us[2]<>UCS4Char(widechar('Ł'))) or
														
 
															-     (us[3]<>UCS4Char(widechar('ć'))) or
														
 
															-     (us[4]<>UCS4Char(widechar('ç'))) or
														
 
															-     (us[5]<>UCS4Char(widechar('Ź'))) or
														
 
															+     (us[0]<>UCS4Char(unicodechar('é'))) or
														
 
															+     (us[1]<>UCS4Char(unicodechar('ł'))) or
														
 
															+     (us[2]<>UCS4Char(unicodechar('Ł'))) or
														
 
															+     (us[3]<>UCS4Char(unicodechar('ć'))) or
														
 
															+     (us[4]<>UCS4Char(unicodechar('ç'))) or
														
 
															+     (us[5]<>UCS4Char(unicodechar('Ź'))) or
														
 
															      (us[6]<>UCS4Char($2F804)) or
														
 
															      (us[7]<>UCS4Char(0)) then
														
 
															     halt(2);
														
 
															-  ws:=UCS4StringToWideString(us);
														
 
															+  ws:=UCS4StringToUnicodeString(us);
														
 
															   if (length(ws)<>8) or
														
 
															      (ws[1]<>'é') or
														
 
															      (ws[2]<>'ł') or
														
--- a/tests/test/tunistr6.pp
+++ b/tests/test/tunistr6.pp
@@ -0,0 +1,397 @@
 
															+{%skiptarget=wince}
														
 
															+{$codepage utf-8}
														
 
															+uses
														
 
															+  {$ifdef unix}
														
 
															+  cwstring,
														
 
															+  {$endif}
														
 
															+  sysutils;
														
 
															+
														
 
															+procedure doerror(i : integer);
														
 
															+  begin
														
 
															+    writeln('Error: ',i);
														
 
															+    halt(i);
														
 
															+  end;
														
 
															+
														
 
															+
														
 
															+{ normal upper case testing }
														
 
															+procedure testupper;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  w1:='aé'#0'èàł'#$d87e#$dc04;
														
 
															+  w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original upper: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+{$ifdef print}
														
 
															+  writeln('ansi: ',s);
														
 
															+{$endif print}
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiUpperCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeUpperCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodeupper: ',w1);
														
 
															+  writeln('original upper: ',w2);
														
 
															+  writeln('ansiupper: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(1);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(2);
														
 
															+
														
 
															+  w1:='aéèàł'#$d87e#$dc04;
														
 
															+  w2:='AÉÈÀŁ'#$d87e#$dc04;
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiStrUpper(pchar(s));
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeUpperCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodeupper: ',w1);
														
 
															+  writeln('ansistrupper: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(21);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(22);
														
 
															+
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{ normal lower case testing }
														
 
															+procedure testlower;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
														
 
															+  w2:='aé'#0'èàł'#$d87e#$dc04;
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original lower: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiLowerCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeLowerCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodelower: ',w1);
														
 
															+  writeln('ansilower: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(3);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(4);
														
 
															+
														
 
															+
														
 
															+  w1:='AÉÈÀŁ'#$d87e#$dc04;
														
 
															+  w2:='aéèàł'#$d87e#$dc04;
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiStrLower(pchar(s));
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeLowerCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodelower: ',w1);
														
 
															+  writeln('ansistrlower: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(3);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(4);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+
														
 
															+{ upper case testing with a missing utf-16 pair at the end }
														
 
															+procedure testupperinvalid;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  { missing utf-16 pair at end }
														
 
															+  w1:='aé'#0'èàł'#$d87e;
														
 
															+  w2:='AÉ'#0'ÈÀŁ'#$d87e;
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original upper: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiUpperCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeUpperCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodeupper: ',w1);
														
 
															+  writeln('ansiupper: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(5);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(6);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{ lower case testing with a missing utf-16 pair at the end }
														
 
															+procedure testlowerinvalid;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  { missing utf-16 pair at end}
														
 
															+  w1:='AÉ'#0'ÈÀŁ'#$d87e;
														
 
															+  w2:='aé'#0'èàł'#$d87e;
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original lower: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiLowerCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeLowerCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodelower: ',w1);
														
 
															+  writeln('ansilower: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(7);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(8);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+
														
 
															+{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
														
 
															+procedure testupperinvalid1;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  { missing utf-16 pair at end with char after it}
														
 
															+  w1:='aé'#0'èàł'#$d87e'j';
														
 
															+  w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original upper: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiUpperCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeUpperCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodeupper: ',w1);
														
 
															+  writeln('ansiupper: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(9);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(10);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
														
 
															+procedure testlowerinvalid1;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  { missing utf-16 pair at end with char after it}
														
 
															+  w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
														
 
															+  w2:='aé'#0'èàł'#$d87e'j';
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original lower: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  w3:=s;
														
 
															+  w4:=AnsiLowerCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeLowerCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodelower: ',w1);
														
 
															+  writeln('ansilower: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(11);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(12);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{ upper case testing with corrupting the utf-8 string after conversion }
														
 
															+procedure testupperinvalid2;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  w1:='aé'#0'èàł'#$d87e#$dc04'ö';
														
 
															+  w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original upper: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  { truncate the last utf-8 character }
														
 
															+  setlength(s,length(s)-1);
														
 
															+  w3:=s;
														
 
															+  { adjust checking values for new length due to corruption }
														
 
															+  if length(w3)<>length(w2) then
														
 
															+    begin
														
 
															+      setlength(w2,length(w3));
														
 
															+      setlength(w1,length(w3));
														
 
															+    end;
														
 
															+  w4:=AnsiUpperCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeUpperCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodeupper: ',w1);
														
 
															+  writeln('ansiupper: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(13);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(14);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{ lower case testing with corrupting the utf-8 string after conversion }
														
 
															+procedure testlowerinvalid2;
														
 
															+var
														
 
															+  s: ansistring;
														
 
															+  w1,w2,w3,w4: unicodestring;
														
 
															+  i: longint;
														
 
															+begin
														
 
															+  w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
														
 
															+  w2:='aé'#0'èàł'#$d87e#$dc04'ö';
														
 
															+{$ifdef print}
														
 
															+// the utf-8 output can confuse the testsuite parser
														
 
															+  writeln('original: ',w1);
														
 
															+  writeln('original lower: ',w2);
														
 
															+{$endif print}
														
 
															+  s:=w1;
														
 
															+  { truncate the last utf-8 character }
														
 
															+  setlength(s,length(s)-1);
														
 
															+  w3:=s;
														
 
															+  { adjust checking values for new length due to corruption }
														
 
															+  if length(w3)<>length(w2) then
														
 
															+    begin
														
 
															+      setlength(w2,length(w3));
														
 
															+      setlength(w1,length(w3));
														
 
															+    end;
														
 
															+  w4:=AnsiLowerCase(s);
														
 
															+  { filter out unsupported characters }
														
 
															+  for i:=1 to length(w3) do
														
 
															+    if w3[i]='?' then
														
 
															+      begin
														
 
															+        w2[i]:='?';
														
 
															+        w1[i]:='?';
														
 
															+      end;
														
 
															+  w1:=UnicodeLowerCase(w1);
														
 
															+{$ifdef print}
														
 
															+  writeln('unicodelower: ',w1);
														
 
															+  writeln('ansilower: ',w4);
														
 
															+{$endif print}
														
 
															+  if (w1 <> w2) then
														
 
															+    doerror(15);
														
 
															+  if (w4 <> w2) then
														
 
															+    doerror(16);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+
														
 
															+begin
														
 
															+  testupper;
														
 
															+  writeln;
														
 
															+  testlower;
														
 
															+  writeln;
														
 
															+  writeln;
														
 
															+  testupperinvalid;
														
 
															+  writeln;
														
 
															+  testlowerinvalid;
														
 
															+  writeln;
														
 
															+  writeln;
														
 
															+  testupperinvalid1;
														
 
															+  writeln;
														
 
															+  testlowerinvalid1;
														
 
															+  writeln;
														
 
															+  writeln;
														
 
															+  testupperinvalid2;
														
 
															+  writeln;
														
 
															+  testlowerinvalid2;
														
 
															+  writeln('ok');
														
 
															+end.
														
--- a/tests/test/tunistr7.pp
+++ b/tests/test/tunistr7.pp
@@ -0,0 +1,47 @@
 
															+{$codepage utf-8}
														
 
															+
														
 
															+uses
														
 
															+{$ifdef unix}
														
 
															+  cwstring,
														
 
															+{$endif unix}
														
 
															+  sysutils;
														
 
															+
														
 
															+procedure testwcmp;
														
 
															+var
														
 
															+  w1,w2: unicodestring;
														
 
															+  s: ansistring;
														
 
															+begin
														
 
															+  w1:='aécde';
														
 
															+  { filter unsupported characters }
														
 
															+  s:=w1;
														
 
															+  w1:=s;
														
 
															+  w2:=w1;
														
 
															+
														
 
															+  if (w1<>w2) then
														
 
															+    halt(1);
														
 
															+  w1[2]:='f';
														
 
															+  if (w1=w2) or
														
 
															+     WideSameStr(w1,w2) or
														
 
															+     (WideCompareText(w1,w2)=0) or
														
 
															+     (WideCompareStr(w1,w2)<0) or
														
 
															+     (WideCompareStr(w2,w1)>0) then
														
 
															+    halt(2);
														
 
															+  w1[2]:=#0;
														
 
															+  w2[2]:=#0;
														
 
															+  if (w1<>w2) or
														
 
															+     not WideSameStr(w1,w2) or
														
 
															+     (WideCompareStr(w1,w2)<>0) or
														
 
															+     (WideCompareText(w1,w2)<>0) then
														
 
															+    halt(3);
														
 
															+  w1[3]:='m';
														
 
															+  if WideSameStr(w1,w2) or
														
 
															+     (WideCompareText(w1,w2)=0) or
														
 
															+     (WideCompareStr(w1,w2)<0) or
														
 
															+     (WideCompareStr(w2,w1)>0) then
														
 
															+    halt(4);
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+begin
														
 
															+  testwcmp;
														
 
															+end.