|
@@ -14,6 +14,7 @@
|
|
|
**********************************************************************}
|
|
|
|
|
|
{$mode objfpc}
|
|
|
+{$inline on}
|
|
|
|
|
|
unit cwstring;
|
|
|
|
|
@@ -45,11 +46,22 @@ Const
|
|
|
{$endif}
|
|
|
|
|
|
{ helper functions from libc }
|
|
|
+function tolower(__wc:cint):cint;cdecl;external libiconvname name 'tolower';
|
|
|
+function toupper(__wc:cint):cint;cdecl;external libiconvname name 'toupper';
|
|
|
function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
|
|
|
function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
|
|
|
+
|
|
|
function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
|
|
|
function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
|
|
|
function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
|
|
|
+{$ifndef beos}
|
|
|
+function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
|
|
|
+function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
|
|
|
+{$else beos}
|
|
|
+function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
|
|
|
+function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
|
|
|
+{$endif beos}
|
|
|
+
|
|
|
|
|
|
const
|
|
|
{$ifdef linux}
|
|
@@ -253,6 +265,92 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
|
|
|
end;
|
|
|
|
|
|
|
|
|
+(*
|
|
|
+function LowerWideString(const s : WideString) : WideString;
|
|
|
+ var
|
|
|
+ i, slen : SizeInt;
|
|
|
+{$if sizeof(wchar_t) = 4}
|
|
|
+ nc : wint_t;
|
|
|
+ resindex : SizeInt;
|
|
|
+ len : longint;
|
|
|
+ valid : boolean;
|
|
|
+{$elseif sizeof(wchar_t) = 2}
|
|
|
+ p : PWideChar;
|
|
|
+{$endif sizeof(wchar_t)}
|
|
|
+ begin
|
|
|
+ slen:=length(s);
|
|
|
+ SetLength(result,slen);
|
|
|
+{$if sizeof(wint_t) = 4}
|
|
|
+ i:=1;
|
|
|
+ resindex:=1;
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ nc:=utf16toutf32(s,i,len,valid);
|
|
|
+ inc(i,len);
|
|
|
+ if (valid) then
|
|
|
+ ConcatUTF32ToWideStr(towlower(nc),result,resindex)
|
|
|
+ else
|
|
|
+ { do nothing }
|
|
|
+ pwidechar(@result[i])^:=s[i];
|
|
|
+ end;
|
|
|
+ { adjust length }
|
|
|
+ setlength(result,resindex-1);
|
|
|
+{$elseif sizeof(wchar_t) = 2}
|
|
|
+ { avoid unique calls for each character }
|
|
|
+ p:=@result[1];
|
|
|
+ { this will fail for surrogate pairs, but that's inherent }
|
|
|
+ { to choosing sizeof(wint_t)=2, and nothing we can help }
|
|
|
+ for i:=1 to length(s) do
|
|
|
+ p[i]:=WideChar(towlower(wint_t(s[i])));
|
|
|
+{$else}
|
|
|
+{$error Unsupported wchar_t size}
|
|
|
+{$endif}
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+function UpperWideString(const s : WideString) : WideString;
|
|
|
+ var
|
|
|
+ i, slen : SizeInt;
|
|
|
+{$if sizeof(wint_t) = 4}
|
|
|
+ nc : wint_t;
|
|
|
+ resindex : SizeInt;
|
|
|
+ len : longint;
|
|
|
+ valid : boolean;
|
|
|
+{$elseif sizeof(wchar_t) = 2}
|
|
|
+ p : PWideChar;
|
|
|
+{$endif sizeof(wchar_t)}
|
|
|
+ begin
|
|
|
+ slen:=length(s);
|
|
|
+ SetLength(result,slen);
|
|
|
+{$if sizeof(wchar_t) = 4}
|
|
|
+ i:=1;
|
|
|
+ resindex:=1;
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ nc:=utf16toutf32(s,i,len,valid);
|
|
|
+ inc(i,len);
|
|
|
+ if (valid) then
|
|
|
+ ConcatUTF32ToWideStr (towupper(nc),result,resindex)
|
|
|
+ else
|
|
|
+ { do nothing }
|
|
|
+ pwidechar(@result[i])^:=s[i];
|
|
|
+ end;
|
|
|
+ { adjust length }
|
|
|
+ setlength(result,resindex-1);
|
|
|
+{$elseif sizeof(wchar_t) = 2}
|
|
|
+ { avoid unique calls for each character }
|
|
|
+ p:=@result[1];
|
|
|
+ { this will fail for surrogate pairs, but that's inherent }
|
|
|
+ { to choosing sizeof(wint_t)=2, and nothing we can help }
|
|
|
+ for i:=1 to length(s) do
|
|
|
+ p[i]:=WideChar(towupper(wint_t(s[i])));
|
|
|
+{$else}
|
|
|
+{$error Unsupported wchar_t size}
|
|
|
+{$endif}
|
|
|
+ end;
|
|
|
+*)
|
|
|
+
|
|
|
+
|
|
|
function LowerWideString(const s : WideString) : WideString;
|
|
|
var
|
|
|
i : SizeInt;
|
|
@@ -273,11 +371,193 @@ function UpperWideString(const s : WideString) : WideString;
|
|
|
end;
|
|
|
|
|
|
|
|
|
+procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
|
|
|
+begin
|
|
|
+ if (len>length(s)) then
|
|
|
+ if (length(s) < 10*256) then
|
|
|
+ setlength(s,length(s)+10)
|
|
|
+ else
|
|
|
+ setlength(s,length(s)+length(s) shr 8);
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
|
|
|
+begin
|
|
|
+ EnsureAnsiLen(s,index);
|
|
|
+ pchar(@s[index])^:=c;
|
|
|
+ inc(index);
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
|
|
|
+{$ifndef beos}
|
|
|
+procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
|
|
|
+{$else not beos}
|
|
|
+procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
|
|
|
+{$endif beos}
|
|
|
+var
|
|
|
+ p : pchar;
|
|
|
+ mblen : size_t;
|
|
|
+begin
|
|
|
+ { we know that s is unique -> avoid uniquestring calls}
|
|
|
+ p:=@s[index];
|
|
|
+ if (nc<$7f) then
|
|
|
+ ConcatCharToAnsiStr(char(nc),s,index)
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
|
|
+ { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
|
|
|
+ { that's a libc macro mapped to internal functions/variables }
|
|
|
+ { and thus not a stable external API on systems where libc }
|
|
|
+ { breaks backwards compatibility every now and then }
|
|
|
+ EnsureAnsiLen(s,index+10);
|
|
|
+{$ifndef beos}
|
|
|
+ mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
|
|
|
+{$else not beos}
|
|
|
+ mblen:=wctomb(p,wchar_t(nc));
|
|
|
+{$endif not beos}
|
|
|
+ if (mblen<>size_t(-1)) then
|
|
|
+ inc(index,mblen)
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { invalid wide char }
|
|
|
+ p^:='?';
|
|
|
+ inc(index);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+function LowerAnsiString(const s : AnsiString) : AnsiString;
|
|
|
+ var
|
|
|
+ i, slen,
|
|
|
+ resindex : SizeInt;
|
|
|
+ mblen : size_t;
|
|
|
+{$ifndef beos}
|
|
|
+ ombstate,
|
|
|
+ nmbstate : mbstate_t;
|
|
|
+{$endif beos}
|
|
|
+ wc : wchar_t;
|
|
|
+ begin
|
|
|
+ fillchar(ombstate,sizeof(ombstate),0);
|
|
|
+ fillchar(nmbstate,sizeof(nmbstate),0);
|
|
|
+ slen:=length(s);
|
|
|
+ SetLength(result,slen+10);
|
|
|
+ i:=1;
|
|
|
+ resindex:=1;
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ if (s[i]<=#127) then
|
|
|
+ begin
|
|
|
+ ConcatCharToAnsiStr(char(tolower(cint(s[i]))),result,resindex);
|
|
|
+ inc(i)
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+{$ifndef beos}
|
|
|
+ mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
|
|
+{$else not beos}
|
|
|
+ mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
|
|
|
+{$endif not beos}
|
|
|
+ case mblen of
|
|
|
+ size_t(-2):
|
|
|
+ begin
|
|
|
+ { partial invalid character, copy literally }
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ ConcatCharToAnsiStr(s[i],result,resindex);
|
|
|
+ inc(i);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ size_t(-1), 0:
|
|
|
+ begin
|
|
|
+ { invalid or null character }
|
|
|
+ ConcatCharToAnsiStr(s[i],result,resindex);
|
|
|
+ inc(i);
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { a valid sequence }
|
|
|
+ { even if mblen = 1, the lowercase version may have a }
|
|
|
+ { different length }
|
|
|
+ { We can't do anything special if wchar_t is 16 bit... }
|
|
|
+{$ifndef beos}
|
|
|
+ ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
|
|
|
+{$else not beos}
|
|
|
+ ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
|
|
|
+{$endif not beos}
|
|
|
+ inc(i,mblen);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ SetLength(result,resindex-1);
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+function UpperAnsiString(const s : AnsiString) : AnsiString;
|
|
|
+ var
|
|
|
+ i, slen,
|
|
|
+ resindex : SizeInt;
|
|
|
+ mblen : size_t;
|
|
|
+ ombstate,
|
|
|
+ nmbstate : mbstate_t;
|
|
|
+ wc : wchar_t;
|
|
|
+ begin
|
|
|
+ fillchar(ombstate,sizeof(ombstate),0);
|
|
|
+ fillchar(nmbstate,sizeof(nmbstate),0);
|
|
|
+ slen:=length(s);
|
|
|
+ SetLength(result,slen+10);
|
|
|
+ i:=1;
|
|
|
+ resindex:=1;
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ if (s[i]<=#127) then
|
|
|
+ begin
|
|
|
+ ConcatCharToAnsiStr(char(toupper(cint(s[i]))),result,resindex);
|
|
|
+ inc(i)
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
|
|
+ case mblen of
|
|
|
+ size_t(-2):
|
|
|
+ begin
|
|
|
+ { partial invalid character, copy literally }
|
|
|
+ while (i<=slen) do
|
|
|
+ begin
|
|
|
+ ConcatCharToAnsiStr(s[i],result,resindex);
|
|
|
+ inc(i);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ size_t(-1), 0:
|
|
|
+ begin
|
|
|
+ { invalid or null character }
|
|
|
+ ConcatCharToAnsiStr(s[i],result,resindex);
|
|
|
+ inc(i);
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { a valid sequence }
|
|
|
+ { even if mblen = 1, the uppercase version may have a }
|
|
|
+ { different length }
|
|
|
+ { We can't do anything special if wchar_t is 16 bit... }
|
|
|
+ ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
|
|
|
+ inc(i,mblen);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ SetLength(result,resindex-1);
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
|
|
var
|
|
|
outlength,
|
|
|
outoffset,
|
|
|
outleft : size_t;
|
|
|
+ err: cint;
|
|
|
srcpos,
|
|
|
destpos: pchar;
|
|
|
mynil : pchar;
|
|
@@ -294,7 +574,22 @@ procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
|
|
outleft:=outlength*4;
|
|
|
while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
|
|
begin
|
|
|
- case fpgetCerrno of
|
|
|
+ err:=fpgetCerrno;
|
|
|
+ case err of
|
|
|
+ ESysEINVAL,
|
|
|
+ ESysEILSEQ:
|
|
|
+ begin
|
|
|
+ { skip and set to '?' }
|
|
|
+ inc(srcpos);
|
|
|
+ dec(len);
|
|
|
+ plongint(destpos)^:=longint('?');
|
|
|
+ inc(destpos,4);
|
|
|
+ dec(outleft,4);
|
|
|
+ { reset }
|
|
|
+ iconv(iconv_ansi2ucs4,@mynil,@my0,@mynil,@my0);
|
|
|
+ if err=ESysEINVAL then
|
|
|
+ break;
|
|
|
+ end;
|
|
|
ESysE2BIG:
|
|
|
begin
|
|
|
outoffset:=destpos-pchar(dest);
|
|
@@ -375,9 +670,10 @@ begin
|
|
|
CompareTextWideStringProc:=@CompareTextWideString;
|
|
|
{
|
|
|
CharLengthPCharProc
|
|
|
-
|
|
|
- UpperAnsiStringProc
|
|
|
- LowerAnsiStringProc
|
|
|
+ }
|
|
|
+ UpperAnsiStringProc:=@UpperAnsiString;
|
|
|
+ LowerAnsiStringProc:=@LowerAnsiString;
|
|
|
+ {
|
|
|
CompareStrAnsiStringProc
|
|
|
CompareTextAnsiStringProc
|
|
|
}
|