12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 2005 by Florian Klaempfl,
- member of the Free Pascal development team.
- libc based wide string support
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$mode objfpc}
- {$inline on}
- {$implicitexceptions off}
- unit cwstring;
- interface
- procedure SetCWidestringManager;
- implementation
- {$linklib c}
- // Linux (and maybe glibc platforms in general), have iconv in glibc.
- {$if defined(linux) or defined(solaris)}
- {$define iconv_is_in_libc}
- {$endif}
- {$ifdef netbsd}
- {$ifndef DISABLE_ICONV_LIBC}
- {$define iconv_is_in_libc}
- {$endif}
- {$endif}
- {$ifndef iconv_is_in_libc}
- {$if defined(haiku)}
- {$linklib textencoding}
- {$else}
- {$linklib iconv}
- {$endif}
- {$define useiconv}
- {$endif not iconv_is_in_libc}
- {$i rtldefs.inc}
- Uses
- BaseUnix,
- ctypes,
- unix,
- unixtype,
- initc,
- dynlibs,
- unixcp;
- Const
- {$ifndef useiconv}
- libiconvname='c'; // is in libc under Linux.
- libprefix='lib';
- {$else}
- {$ifdef haiku}
- libiconvname='textencoding'; // is in libtextencoding under Haiku
- libprefix='lib';
- {$else}
- {$if defined(darwin) or defined(aix)}
- libiconvname='libiconv';
- libprefix='';
- {$else}
- libiconvname='iconv';
- libprefix='lib';
- {$endif}
- {$endif}
- {$endif}
- { helper functions from libc }
- function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
- function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
- function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
- function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
- {$ifdef netbsd}
- { NetBSD has a new setlocale function defined in /usr/include/locale.h
- that should be used }
- function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name '__setlocale50';
- {$else}
- function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
- {$endif}
- {$if not(defined(beos) and not defined(haiku))}
- function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
- function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
- function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
- {$else beos}
- function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
- function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
- function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
- {$endif beos}
- const
- {$if defined(linux)}
- __LC_CTYPE = 0;
- LC_ALL = 6;
- _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
- _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
- CODESET = _NL_CTYPE_CODESET_NAME;
- {$elseif defined(darwin)}
- CODESET = 0;
- LC_ALL = 0;
- {$elseif defined(FreeBSD)} // actually FreeBSD5. internationalisation is afaik not default on 4.
- __LC_CTYPE = 0;
- LC_ALL = 0;
- _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
- _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
- CODESET = 0; // _NL_CTYPE_CODESET_NAME;
- {$elseif defined(solaris)}
- {$define ACCEPT_646}
- CODESET=49;
- LC_ALL = 6;
- {$elseif defined(beos)}
- {$ifdef haiku}
- CODESET= 0; // Checked for Haiku
- LC_ALL = 0; // Checked for Haiku
- {$else}
- {$warning check correct value for BeOS}
- CODESET=49;
- LC_ALL = 6; // Checked for BeOS
- {$endif}
- ESysEILSEQ = EILSEQ;
- {$elseif defined(OpenBSD)}
- {$define ACCEPT_646}
- CODESET = 51;
- LC_ALL = 0;
- {$elseif defined(NetBSD)}
- {$define ACCEPT_646}
- CODESET = 51;
- LC_ALL = 0;
- {$elseif defined(aix)}
- CODESET = 49;
- LC_ALL = -1;
- {$elseif defined(dragonfly)}
- CODESET = 0;
- LC_ALL = 0;
- __LC_CTYPE = 0;
- _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
- _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
- {$else not aix}
- {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
- // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
- {$endif}
- { unicode encoding name }
- {$ifdef FPC_LITTLE_ENDIAN}
- unicode_encoding2 = 'UTF-16LE';
- unicode_encoding4 = 'UCS-4LE';
- {$else FPC_LITTLE_ENDIAN}
- {$ifdef AIX}
- unicode_encoding2 = 'UTF-16';
- {$else AIX}
- unicode_encoding2 = 'UTF-16BE';
- unicode_encoding4 = 'UCS-4BE';
- {$endif AIX}
- {$endif FPC_LITTLE_ENDIAN}
- { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
- { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
- { that's a libc macro mapped to internal functions/variables }
- { and thus not a stable external API on systems where libc }
- { breaks backwards compatibility every now and then }
- MB_CUR_MAX = 10;
- { Requests for iconvctl }
- ICONV_TRIVIALP = 0; // int *argument
- ICONV_GET_TRANSLITERATE = 1; // int *argument
- ICONV_SET_TRANSLITERATE = 2; // const int *argument
- ICONV_GET_DISCARD_ILSEQ = 3; // int *argument
- ICONV_SET_DISCARD_ILSEQ = 4; // const int *argument
- ICONV_SET_HOOKS = 5; // const struct iconv_hooks *argument
- ICONV_SET_FALLBACKS = 6; // const struct iconv_fallbacks *argument
- type
- piconv_t = ^iconv_t;
- iconv_t = pointer;
- nl_item = cint;
- {$ifdef haiku}
- function nl_langinfo(__item:nl_item):pchar;cdecl;external 'root' name 'nl_langinfo';
- {$else}
- {$ifndef beos}
- function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
- {$endif}
- {$endif}
- {$if (not defined(bsd) and not defined(beos)) or defined(iconv_is_in_libc) or (defined(darwin) and not defined(cpupowerpc32))}
- function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
- function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
- function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
- const
- iconvctlname='iconvctl';
- {$else}
- function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
- function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
- function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
- const
- iconvctlname='libiconvctl';
- {$endif}
- var
- iconvctl:function(__cd:iconv_t; __request:cint; __argument:pointer):cint;cdecl;
- procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
- threadvar
- iconv_ansi2wide,
- iconv_wide2ansi : iconv_t;
- { since we cache the iconv_t converters, we have to do the same
- for the DefaultSystemCodePage variable since if it changes, we
- have to re-initialize the converters too. We can't do that via
- a callback in the widestring manager because DefaultSystemCodePage
- is not a threadvar and we can't automatically change this in all
- threads }
- current_DefaultSystemCodePage: TSystemCodePage;
- procedure InitThread;
- var
- transliterate: cint;
- {$if not(defined(darwin) and (defined(cpuarm) or defined(cpuaarch64))) and not defined(iphonesim)}
- iconvindex: longint;
- {$endif}
- iconvname, toencoding: rawbytestring;
- begin
- current_DefaultSystemCodePage:=DefaultSystemCodePage;
- {$if declared(iconvindex)}
- iconvindex:=GetCodepageData(DefaultSystemCodePage);
- if iconvindex<>-1 then
- iconvname:=UnixCpMap[iconvindex].name
- else
- { default to UTF-8 on Unix platforms }
- iconvname:='UTF-8';
- {$else}
- { Unix locale settings are ignored on iPhoneOS/iPhoneSimulator }
- iconvname:='UTF-8';
- {$endif}
- toencoding:=iconvname;
- if not assigned(iconvctl) then
- toencoding:=toencoding+'//TRANSLIT';
- iconv_wide2ansi:=iconv_open(pchar(toencoding),unicode_encoding2);
- iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
- if assigned(iconvctl) and
- (iconv_wide2ansi<>iconv_t(-1)) then
- begin
- transliterate:=1;
- iconvctl(iconv_wide2ansi,ICONV_SET_TRANSLITERATE,@transliterate);
- end;
- end;
- procedure FiniThread;
- begin
- if (iconv_wide2ansi <> iconv_t(-1)) then
- iconv_close(iconv_wide2ansi);
- if (iconv_ansi2wide <> iconv_t(-1)) then
- iconv_close(iconv_ansi2wide);
- end;
- {$if defined(beos) and not defined(haiku)}
- function nl_langinfo(__item:nl_item):pchar;
- begin
- {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
- // Now implement the minimum required to correctly initialize WideString support
- case __item of
- CODESET : Result := 'UTF-8'; // BeOS use UTF-8
- else
- begin
- Assert(False, 'nl_langinfo was called with an unknown nl_item value');
- Result := '';
- end;
- end;
- end;
- {$endif}
- function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
- var
- iconvindex: longint;
- toencoding: rawbytestring;
- transliterate: cint;
- begin
- { TODO: add caching (then we also don't need separate code for
- the default system page and other ones)
- -- typecasting an ansistring function result to pchar is
- unsafe normally, but these are constant strings -> no
- problem }
- open_iconv_for_cps:=iconv_t(-1);
- iconvindex:=GetCodepageData(cp);
- if iconvindex=-1 then
- exit;
- repeat
- if cp_is_from then
- open_iconv_for_cps:=iconv_open(otherencoding,pchar(UnixCpMap[iconvindex].name))
- else
- begin
- toencoding:=UnixCpMap[iconvindex].name;
- if not assigned(iconvctl) then
- toencoding:=toencoding+'//TRANSLIT';
- open_iconv_for_cps:=iconv_open(pchar(toencoding),otherencoding);
- end;
- inc(iconvindex);
- until (open_iconv_for_cps<>iconv_t(-1)) or
- (iconvindex>high(UnixCpMap)) or
- (UnixCpMap[iconvindex].cp<>cp);
- if not cp_is_from and
- (open_iconv_for_cps<>iconv_t(-1)) and
- assigned(iconvctl) then
- begin
- transliterate:=1;
- iconvctl(open_iconv_for_cps,ICONV_SET_TRANSLITERATE,@transliterate);
- end;
- end;
- {$ifdef aix}
- {$i cwstraix.inc}
- {$endif aix}
- procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
- var
- outlength,
- outoffset,
- srclen,
- outleft : size_t;
- use_iconv: iconv_t;
- srcpos : pwidechar;
- destpos: pchar;
- mynil : pchar;
- my0 : size_t;
- err : longint;
- transliterate: cint;
- free_iconv: boolean;
- {$ifdef aix}
- intermediate: rawbytestring;
- {$endif aix}
- begin
- {$ifdef aix}
- { AIX libiconv does not support converting cp866 to anything else except
- for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
- if cp=866 then
- begin
- Wide2AnsiMove(source,intermediate,28595,len);
- if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
- exit;
- end;
- {$endif aix}
- if (cp=DefaultSystemCodePage) then
- begin
- { update iconv converter in case the DefaultSystemCodePage has been
- changed }
- if current_DefaultSystemCodePage<>DefaultSystemCodePage then
- begin
- FiniThread;
- InitThread;
- end;
- use_iconv:=iconv_wide2ansi;
- free_iconv:=false;
- end
- else
- begin
- use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
- if (use_iconv<>iconv_t(-1)) and
- assigned(iconvctl) then
- begin
- transliterate:=1;
- iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
- end;
- free_iconv:=true;
- end;
- { unsupported encoding -> default move }
- if use_iconv=iconv_t(-1) then
- begin
- DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
- exit;
- end;
- mynil:=nil;
- my0:=0;
- { rought estimation }
- setlength(dest,len*3);
- outlength:=len*3;
- srclen:=len*2;
- srcpos:=source;
- destpos:=pchar(dest);
- outleft:=outlength;
- while iconv(use_iconv,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
- begin
- err:=fpgetCerrno;
- case err of
- { last character is incomplete sequence }
- ESysEINVAL,
- { incomplete sequence in the middle }
- ESysEILSEQ:
- begin
- { skip and set to '?' }
- inc(srcpos);
- dec(srclen,2);
- destpos^:='?';
- inc(destpos);
- dec(outleft);
- { reset }
- iconv(use_iconv,@mynil,@my0,@mynil,@my0);
- if err=ESysEINVAL then
- break;
- end;
- ESysE2BIG:
- begin
- outoffset:=destpos-pchar(dest);
- { extend }
- setlength(dest,outlength+len*3);
- inc(outleft,len*3);
- inc(outlength,len*3);
- { string could have been moved }
- destpos:=pchar(dest)+outoffset;
- end;
- else
- runerror(231);
- end;
- end;
- // truncate string
- setlength(dest,length(dest)-outleft);
- SetCodePage(dest,cp,false);
- if free_iconv then
- iconv_close(use_iconv);
- end;
- procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
- var
- outlength,
- outoffset,
- outleft : size_t;
- use_iconv: iconv_t;
- srcpos,
- destpos: pchar;
- mynil : pchar;
- my0 : size_t;
- err: cint;
- iconvindex: longint;
- free_iconv: boolean;
- {$ifdef aix}
- intermediate: rawbytestring;
- {$endif aix}
- begin
- {$ifdef aix}
- { AIX libiconv does not support converting cp866 to anything else except
- for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
- if cp=866 then
- begin
- if handle_aix_intermediate(source,cp,cp,intermediate,len) then
- source:=pchar(intermediate);
- end;
- {$endif aix}
- if (cp=DefaultSystemCodePage) then
- begin
- { update iconv converter in case the DefaultSystemCodePage has been
- changed }
- if current_DefaultSystemCodePage<>DefaultSystemCodePage then
- begin
- FiniThread;
- InitThread;
- end;
- use_iconv:=iconv_ansi2wide;
- free_iconv:=false;
- end
- else
- begin
- { TODO: add caching (then we also don't need separate code for
- the default system page and other ones)
- -- typecasting an ansistring function result to pchar is
- unsafe normally, but these are constant strings -> no
- problem }
- use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
- free_iconv:=true;
- end;
- { unsupported encoding -> default move }
- if use_iconv=iconv_t(-1) then
- begin
- DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
- exit;
- end;
- mynil:=nil;
- my0:=0;
- // extra space
- outlength:=len+1;
- setlength(dest,outlength);
- srcpos:=source;
- destpos:=pchar(dest);
- outleft:=outlength*2;
- while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
- begin
- err:=fpgetCerrno;
- case err of
- ESysEINVAL,
- ESysEILSEQ:
- begin
- { skip and set to '?' }
- inc(srcpos);
- dec(len);
- pwidechar(destpos)^:='?';
- inc(destpos,2);
- dec(outleft,2);
- { reset }
- iconv(use_iconv,@mynil,@my0,@mynil,@my0);
- if err=ESysEINVAL then
- break;
- end;
- ESysE2BIG:
- begin
- outoffset:=destpos-pchar(dest);
- { extend }
- setlength(dest,outlength+len);
- inc(outleft,len*2);
- inc(outlength,len);
- { string could have been moved }
- destpos:=pchar(dest)+outoffset;
- end;
- else
- runerror(231);
- end;
- end;
- // truncate string
- setlength(dest,length(dest)-outleft div 2);
- if free_iconv then
- iconv_close(use_iconv);
- end;
- function LowerWideString(const s : WideString) : WideString;
- var
- i : SizeInt;
- begin
- SetLength(result,length(s));
- for i:=0 to length(s)-1 do
- pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
- end;
- function UpperWideString(const s : WideString) : WideString;
- var
- i : SizeInt;
- begin
- SetLength(result,length(s));
- for i:=0 to length(s)-1 do
- pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
- end;
- procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
- begin
- if (len>length(s)) then
- if (length(s) < 10*256) then
- setlength(s,length(s)+10)
- else
- setlength(s,length(s)+length(s) shr 8);
- end;
- procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
- begin
- EnsureAnsiLen(s,index);
- pchar(@s[index])^:=c;
- inc(index);
- end;
- { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
- {$if not(defined(beos) and not defined(haiku))}
- procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
- {$else not beos}
- procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
- {$endif beos}
- var
- p : pchar;
- mblen : size_t;
- begin
- { we know that s is unique -> avoid uniquestring calls}
- p:=@s[index];
- if (nc<=127) then
- ConcatCharToAnsiStr(char(nc),s,index)
- else
- begin
- EnsureAnsiLen(s,index+MB_CUR_MAX);
- {$if not(defined(beos) and not defined(haiku))}
- mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
- {$else not beos}
- mblen:=wctomb(p,wchar_t(nc));
- {$endif not beos}
- if (mblen<>size_t(-1)) then
- inc(index,mblen)
- else
- begin
- { invalid wide char }
- p^:='?';
- inc(index);
- end;
- end;
- end;
- function LowerAnsiString(const s : AnsiString) : AnsiString;
- var
- i, slen,
- resindex : SizeInt;
- mblen : size_t;
- {$if not(defined(beos) and not defined(haiku))}
- ombstate,
- nmbstate : mbstate_t;
- {$endif beos}
- wc : wchar_t;
- begin
- {$if not(defined(beos) and not defined(haiku))}
- fillchar(ombstate,sizeof(ombstate),0);
- fillchar(nmbstate,sizeof(nmbstate),0);
- {$endif beos}
- slen:=length(s);
- SetLength(result,slen+10);
- i:=1;
- resindex:=1;
- while (i<=slen) do
- begin
- if (s[i]<=#127) then
- begin
- wc:=wchar_t(s[i]);
- mblen:= 1;
- end
- else
- {$if not(defined(beos) and not defined(haiku))}
- mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
- {$else not beos}
- mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
- {$endif not beos}
- case mblen of
- size_t(-2):
- begin
- { partial invalid character, copy literally }
- while (i<=slen) do
- begin
- ConcatCharToAnsiStr(s[i],result,resindex);
- inc(i);
- end;
- end;
- size_t(-1), 0:
- begin
- { invalid or null character }
- ConcatCharToAnsiStr(s[i],result,resindex);
- inc(i);
- end;
- else
- begin
- { a valid sequence }
- { even if mblen = 1, the lowercase version may have a }
- { different length }
- { We can't do anything special if wchar_t is 16 bit... }
- {$if not(defined(beos) and not defined(haiku))}
- ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
- {$else not beos}
- ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
- {$endif not beos}
- inc(i,mblen);
- end;
- end;
- end;
- SetLength(result,resindex-1);
- end;
- function UpperAnsiString(const s : AnsiString) : AnsiString;
- var
- i, slen,
- resindex : SizeInt;
- mblen : size_t;
- {$if not(defined(beos) and not defined(haiku))}
- ombstate,
- nmbstate : mbstate_t;
- {$endif beos}
- wc : wchar_t;
- begin
- {$if not(defined(beos) and not defined(haiku))}
- fillchar(ombstate,sizeof(ombstate),0);
- fillchar(nmbstate,sizeof(nmbstate),0);
- {$endif beos}
- slen:=length(s);
- SetLength(result,slen+10);
- i:=1;
- resindex:=1;
- while (i<=slen) do
- begin
- if (s[i]<=#127) then
- begin
- wc:=wchar_t(s[i]);
- mblen:= 1;
- end
- else
- {$if not(defined(beos) and not defined(haiku))}
- mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
- {$else not beos}
- mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
- {$endif beos}
- case mblen of
- size_t(-2):
- begin
- { partial invalid character, copy literally }
- while (i<=slen) do
- begin
- ConcatCharToAnsiStr(s[i],result,resindex);
- inc(i);
- end;
- end;
- size_t(-1), 0:
- begin
- { invalid or null character }
- ConcatCharToAnsiStr(s[i],result,resindex);
- inc(i);
- end;
- else
- begin
- { a valid sequence }
- { even if mblen = 1, the uppercase version may have a }
- { different length }
- { We can't do anything special if wchar_t is 16 bit... }
- {$if not(defined(beos) and not defined(haiku))}
- ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
- {$else not beos}
- ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
- {$endif not beos}
- inc(i,mblen);
- end;
- end;
- end;
- SetLength(result,resindex-1);
- end;
- function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
- var
- i, slen,
- destindex : SizeInt;
- uch : UCS4Char;
- begin
- slen:=length(s);
- setlength(result,slen+1);
- i:=1;
- destindex:=0;
- while (i<=slen) do
- begin
- uch:=UCS4Char(s[i]);
- if (uch=0) then
- result[destindex]:=32
- else if (uch<=$d7ff) or (uch>=$e000) then
- result[destindex]:=uch
- else if (uch<=$dbff) and
- (i<slen) and
- (s[i+1]>=#$dc00) and
- (s[i+1]<=#$dfff) then
- begin
- result[destindex]:=(UCS4Char(uch-$d7c0) shl 10)+(UCS4Char(s[i+1]) xor $dc00);
- inc(i);
- end
- else { invalid surrogate pair }
- result[destindex]:=uch;
- inc(i);
- inc(destindex);
- end;
- result[destindex]:=UCS4Char(0);
- { Trimming length in this particular case is just a waste of time,
- because result will be interpreted as null-terminated and discarded
- almost immediately }
- end;
- function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
- {$if not(defined (aix) and defined(cpupowerpc32))}
- var
- hs1,hs2 : UCS4String;
- us1,us2 : WideString;
- begin
- { wcscoll interprets null chars as end-of-string -> filter out }
- if coIgnoreCase in Options then
- begin
- us1:=UpperWideString(s1);
- us2:=UpperWideString(s2);
- end
- else
- begin
- us1:=s1;
- us2:=s2;
- end;
- hs1:=WideStringToUCS4StringNoNulls(us1);
- hs2:=WideStringToUCS4StringNoNulls(us2);
- result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
- end;
- {$else}
- { AIX/PPC32 has a 16 bit wchar_t }
- var
- i, len: longint;
- us1,us2 : WideString;
- hs1, hs2: array of widechar;
- begin
- if coIgnoreCase in Options then
- begin
- us1:=UpperWideString(s1);
- us2:=UpperWideString(s2);
- end
- else
- begin
- us1:=s1;
- us2:=s2;
- end;
- len:=length(us1);
- setlength(hs1,len+1);
- for i:=1 to len do
- if us1[i]<>#0 then
- hs1[i-1]:=us1[i]
- else
- hs1[i-1]:=#32;
- hs1[len]:=#0;
- len:=length(us2);
- setlength(hs2,len+1);
- for i:=1 to len do
- if us2[i]<>#0 then
- hs2[i-1]:=us2[i]
- else
- hs2[i-1]:=#32;
- hs2[len]:=#0;
- result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
- end;
- {$endif}
- { return value: number of code points in the string. Whenever an invalid
- code point is encountered, all characters part of this invalid code point
- are considered to form one "character" and the next character is
- considered to be the start of a new (possibly also invalid) code point }
- function CharLengthPChar(const Str: PChar): PtrInt;
- var
- nextlen: ptrint;
- s: pchar;
- {$if not(defined(beos) and not defined(haiku))}
- mbstate: mbstate_t;
- {$endif not beos}
- begin
- result:=0;
- s:=str;
- {$if not(defined(beos) and not defined(haiku))}
- fillchar(mbstate,sizeof(mbstate),0);
- {$endif not beos}
- repeat
- {$if defined(beos) and not defined(haiku)}
- nextlen:=ptrint(mblen(s,MB_CUR_MAX));
- {$else beos}
- nextlen:=ptrint(mbrlen(s,MB_CUR_MAX,@mbstate));
- {$endif beos}
- { skip invalid/incomplete sequences }
- if (nextlen<0) then
- nextlen:=1;
- inc(result,1);
- inc(s,nextlen);
- until (nextlen=0);
- end;
- function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
- {$if not(defined(beos) and not defined(haiku))}
- var
- mbstate: mbstate_t;
- {$endif not beos}
- begin
- {$if defined(beos) and not defined(haiku)}
- result:=ptrint(mblen(str,maxlookahead));
- {$else beos}
- fillchar(mbstate,sizeof(mbstate),0);
- result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
- { mbrlen can also return -2 for "incomplete but potially valid character
- and data has been processed" }
- if result<0 then
- result:=-1;
- {$endif beos}
- end;
- function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
- var
- a,b: pchar;
- i: PtrInt;
- begin
- if not(canmodifys1) then
- getmem(a,len1+1)
- else
- a:=s1;
- for i:=0 to len1-1 do
- if s1[i]<>#0 then
- a[i]:=s1[i]
- else
- a[i]:=#32;
- a[len1]:=#0;
- if not(canmodifys2) then
- getmem(b,len2+1)
- else
- b:=s2;
- for i:=0 to len2-1 do
- if s2[i]<>#0 then
- b[i]:=s2[i]
- else
- b[i]:=#32;
- b[len2]:=#0;
- result:=strcoll(a,b);
- if not(canmodifys1) then
- freemem(a);
- if not(canmodifys2) then
- freemem(b);
- end;
- function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
- begin
- result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
- end;
- function StrCompAnsi(s1,s2 : PChar): PtrInt;
- begin
- result:=strcoll(s1,s2);
- end;
- function AnsiCompareText(const S1, S2: ansistring): PtrInt;
- var
- a, b: AnsiString;
- begin
- a:=UpperAnsistring(s1);
- b:=UpperAnsistring(s2);
- result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
- end;
- function AnsiStrIComp(S1, S2: PChar): PtrInt;
- begin
- result:=AnsiCompareText(ansistring(s1),ansistring(s2));
- end;
- function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
- var
- a, b: pchar;
- begin
- if (maxlen=0) then
- exit(0);
- if (s1[maxlen]<>#0) then
- begin
- getmem(a,maxlen+1);
- move(s1^,a^,maxlen);
- a[maxlen]:=#0;
- end
- else
- a:=s1;
- if (s2[maxlen]<>#0) then
- begin
- getmem(b,maxlen+1);
- move(s2^,b^,maxlen);
- b[maxlen]:=#0;
- end
- else
- b:=s2;
- result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
- if (a<>s1) then
- freemem(a);
- if (b<>s2) then
- freemem(b);
- end;
- function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
- var
- a, b: ansistring;
- begin
- if (maxlen=0) then
- exit(0);
- setlength(a,maxlen);
- move(s1^,a[1],maxlen);
- setlength(b,maxlen);
- move(s2^,b[1],maxlen);
- result:=AnsiCompareText(a,b);
- end;
- procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
- var
- newlen: sizeint;
- begin
- newlen:=length(s);
- if newlen>strlen(orgp) then
- fpc_rangeerror;
- p:=orgp;
- if (newlen>0) then
- move(s[1],p[0],newlen);
- p[newlen]:=#0;
- end;
- function AnsiStrLower(Str: PChar): PChar;
- var
- temp: ansistring;
- begin
- temp:=loweransistring(str);
- ansi2pchar(temp,str,result);
- end;
- function AnsiStrUpper(Str: PChar): PChar;
- var
- temp: ansistring;
- begin
- temp:=upperansistring(str);
- ansi2pchar(temp,str,result);
- end;
- function envvarset(const varname: pchar): boolean;
- var
- varval: pchar;
- begin
- varval:=fpgetenv(varname);
- result:=
- assigned(varval) and
- (varval[0]<>#0);
- end;
- function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
- var
- langinfo: pchar;
- begin
- {$ifdef FPCRTL_FILESYSTEM_UTF8}
- if stdcp=scpFileSystemSingleByte then
- begin
- result:=CP_UTF8;
- exit;
- end;
- {$endif}
- { if none of the relevant LC_* environment variables are set, fall back to
- UTF-8 (this happens under some versions of OS X for GUI applications, which
- otherwise get CP_ASCII) }
- if envvarset('LC_ALL') or
- envvarset('LC_CTYPE') or
- envvarset('LANG') then
- begin
- langinfo:=nl_langinfo(CODESET);
- { there's a bug in the Mac OS X 10.5 libc (based on FreeBSD's)
- that causes it to return an empty string of UTF-8 locales
- -> patch up (and in general, UTF-8 is a good default on
- Unix platforms) }
- if not assigned(langinfo) or
- (langinfo^=#0) then
- langinfo:='UTF-8';
- Result:=GetCodepageByName(ansistring(langinfo));
- end
- else
- Result:=unixcp.GetSystemCodepage;
- end;
- {$ifdef FPC_HAS_CPSTRING}
- procedure SetStdIOCodePage(var T: Text); inline;
- begin
- case TextRec(T).Mode of
- fmInput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleInput);
- fmOutput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleOutput);
- end;
- end;
- procedure SetStdIOCodePages; inline;
- begin
- SetStdIOCodePage(Input);
- SetStdIOCodePage(Output);
- SetStdIOCodePage(ErrOutput);
- SetStdIOCodePage(StdOut);
- SetStdIOCodePage(StdErr);
- end;
- {$endif FPC_HAS_CPSTRING}
- var
- OrgWideStringManager: TUnicodeStringManager;
- Procedure SetCWideStringManager;
- Var
- CWideStringManager : TUnicodeStringManager;
- begin
- GetUnicodeStringManager(OrgWideStringManager);
- CWideStringManager:=OrgWideStringManager;
- With CWideStringManager do
- begin
- Wide2AnsiMoveProc:=@Wide2AnsiMove;
- Ansi2WideMoveProc:=@Ansi2WideMove;
- UpperWideStringProc:=@UpperWideString;
- LowerWideStringProc:=@LowerWideString;
- CompareWideStringProc:=@CompareWideString;
- // CompareTextWideStringProc:=@CompareTextWideString;
- CharLengthPCharProc:=@CharLengthPChar;
- CodePointLengthProc:=@CodePointLength;
- UpperAnsiStringProc:=@UpperAnsiString;
- LowerAnsiStringProc:=@LowerAnsiString;
- CompareStrAnsiStringProc:=@CompareStrAnsiString;
- CompareTextAnsiStringProc:=@AnsiCompareText;
- StrCompAnsiStringProc:=@StrCompAnsi;
- StrICompAnsiStringProc:=@AnsiStrIComp;
- StrLCompAnsiStringProc:=@AnsiStrLComp;
- StrLICompAnsiStringProc:=@AnsiStrLIComp;
- StrLowerAnsiStringProc:=@AnsiStrLower;
- StrUpperAnsiStringProc:=@AnsiStrUpper;
- ThreadInitProc:=@InitThread;
- ThreadFiniProc:=@FiniThread;
- { Unicode }
- Unicode2AnsiMoveProc:=@Wide2AnsiMove;
- Ansi2UnicodeMoveProc:=@Ansi2WideMove;
- UpperUnicodeStringProc:=@UpperWideString;
- LowerUnicodeStringProc:=@LowerWideString;
- CompareUnicodeStringProc:=@CompareWideString;
- { CodePage }
- GetStandardCodePageProc:=@GetStandardCodePage;
- end;
- SetUnicodeStringManager(CWideStringManager);
- end;
- var
- iconvlib:TLibHandle;
- initialization
- SetCWideStringManager;
- { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
- { with the information from the environment variables according to POSIX }
- { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
- setlocale(LC_ALL,'');
- { load iconv library and iconvctl function }
- iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix);
- if iconvlib=0 then
- iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix+'.6');
- if iconvlib<>0 then
- pointer(iconvctl):=GetProcAddress(iconvlib,iconvctlname);
- { set the DefaultSystemCodePage }
- DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
- DefaultFileSystemCodePage:=GetStandardCodePage(scpFileSystemSingleByte);
- DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;
- {$ifdef FPC_HAS_CPSTRING}
- SetStdIOCodePages;
- {$endif FPC_HAS_CPSTRING}
- { init conversion tables for main program }
- InitThread;
- finalization
- { fini conversion tables for main program }
- FiniThread;
- { unload iconv library }
- if iconvlib<>0 then
- FreeLibrary(iconvlib);
- { restore previous (probably default) widestring manager so that subsequent calls
- into the widestring manager won't trigger the finalized functionality }
- SetWideStringManager(OrgWideStringManager);
- end.
|