Browse Source

* fixed unix CompareWideString to not treat null characters inside
a widestring as end-of-string + test (which fails on Darwin/FreeBSD
in utf-8/utf-16 locales, because their libc's wcscoll is documented
as only working in single-byte locales and falling back to wcscmp
for others)

git-svn-id: trunk@9416 -

Jonas Maebe 17 years ago
parent
commit
7ec9dc5bc9
2 changed files with 32 additions and 4 deletions
  1. 1 2
      rtl/inc/wustrings.inc
  2. 31 2
      rtl/unix/cwstring.pp

+ 1 - 2
rtl/inc/wustrings.inc

@@ -1815,7 +1815,7 @@ function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inli
 
 
 
 
 { converts an utf-16 code point or surrogate pair to utf-32 }
 { converts an utf-16 code point or surrogate pair to utf-32 }
-function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char;
+function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
 var
 var
   w: widechar;
   w: widechar;
 begin
 begin
@@ -1853,7 +1853,6 @@ function WideStringToUCS4String(const s : WideString) : UCS4String;
     i, slen,
     i, slen,
     destindex : SizeInt;
     destindex : SizeInt;
     len       : longint;
     len       : longint;
-    uch       : UCS4Char;
   begin
   begin
     slen:=length(s);
     slen:=length(s);
     setlength(result,slen+1);
     setlength(result,slen+1);

+ 31 - 2
rtl/unix/cwstring.pp

@@ -537,12 +537,41 @@ procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
   end;
   end;
 
 
 
 
+function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
+
+function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
+  var
+    i, slen,
+    destindex : SizeInt;
+    len       : longint;
+    uch       : UCS4Char;
+  begin
+    slen:=length(s);
+    setlength(result,slen+1);
+    i:=1;
+    destindex:=0;
+    while (i<=slen) do
+      begin
+        uch:=utf16toutf32(s,i,len);
+        if (uch=UCS4Char(0)) then
+          uch:=UCS4Char(32);
+        result[destindex]:=uch;
+        inc(destindex);
+        inc(i,len);
+      end;
+    result[destindex]:=UCS4Char(0);
+    { destindex <= slen }
+    setlength(result,destindex);
+  end;
+
+
 function CompareWideString(const s1, s2 : WideString) : PtrInt;
 function CompareWideString(const s1, s2 : WideString) : PtrInt;
   var
   var
     hs1,hs2 : UCS4String;
     hs1,hs2 : UCS4String;
   begin
   begin
-    hs1:=WideStringToUCS4String(s1);
-    hs2:=WideStringToUCS4String(s2);
+    { wcscoll interprets null chars as end-of-string -> filter out }
+    hs1:=WideStringToUCS4StringNoNulls(s1);
+    hs2:=WideStringToUCS4StringNoNulls(s2);
     result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
     result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
   end;
   end;