| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206 | {    This file is part of the Free Pascal run time library.    Copyright (c) 2005 by Florian Klaempfl,    member of the Free Pascal development team.    libc based wide string support    See the file COPYING.FPC, included in this distribution,    for details about the copyright.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************}{$mode objfpc}{$modeswitch unicodestrings-}{$h-}{$inline on}{$implicitexceptions off}unit cwstring;interfaceprocedure SetCWidestringManager;implementation{$linklib c}// Linux (and maybe glibc platforms in general), have iconv in glibc.{$if defined(linux) or defined(solaris)}  {$define iconv_is_in_libc}{$endif}{$ifdef netbsd}  {$ifndef DISABLE_ICONV_LIBC}    {$define iconv_is_in_libc}  {$endif}{$endif}{ Modern FreeBSD releases also claim to have iconv  function inside libc, at least  this is stated explicitly in iconv(3) man  of FreeBSD from 10.0 release  PM 2023-05-05 }{$ifdef freebsd}  {$ifndef DISABLE_ICONV_LIBC}    {$define iconv_is_in_libc}  {$endif}{$endif}{$ifndef iconv_is_in_libc} {$if defined(haiku)}   {$linklib textencoding} {$else}   {$linklib iconv} {$endif} {$define useiconv}{$endif not iconv_is_in_libc}{$i rtldefs.inc}Uses  BaseUnix,  ctypes,  unix,  unixtype,  initc,  dynlibs,  unixcp;Const{$ifndef useiconv}    libiconvname='c';  // is in libc under Linux.    libprefix='lib';{$else}  {$ifdef haiku}    libiconvname='textencoding';  // is in libtextencoding under Haiku    libprefix='lib';  {$else}    {$if defined(darwin) or defined(aix)}      libiconvname='libiconv';      libprefix='';    {$else}      libiconvname='iconv';      libprefix='lib';    {$endif}  {$endif}{$endif}{ helper functions from libc }function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';function strcoll (__s1:PAnsiChar; __s2:PAnsiChar):cint;cdecl;external clib name 'strcoll';{$ifdef netbsd}  { NetBSD has a new setlocale function defined in /usr/include/locale.h    that should be used }function setlocale(category: cint; locale: PAnsiChar): PAnsiChar; cdecl; external clib name '__setlocale50';{$else}function setlocale(category: cint; locale: PAnsiChar): PAnsiChar; cdecl; external clib name 'setlocale';{$endif}{$if not(defined(beos) and not defined(haiku))}function mbrtowc(pwc: pwchar_t; const s: PAnsiChar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';function wcrtomb(s: PAnsiChar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';function mbrlen(const s: PAnsiChar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';{$else beos}function mbtowc(pwc: pwchar_t; const s: PAnsiChar; n: size_t): size_t; cdecl; external clib name 'mbtowc';function wctomb(s: PAnsiChar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';function mblen(const s: PAnsiChar; n: size_t): size_t; cdecl; external clib name 'mblen';{$endif beos}const{$if defined(linux)}  __LC_CTYPE = 0;  LC_ALL = 6;  _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);  _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;  CODESET = _NL_CTYPE_CODESET_NAME;{$elseif defined(darwin)}  CODESET = 0;  LC_ALL = 0;{$elseif defined(FreeBSD)} // actually FreeBSD5. internationalisation is afaik not default on 4.  __LC_CTYPE = 0;  LC_ALL = 0;  _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);  _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;  CODESET = 0; // _NL_CTYPE_CODESET_NAME;{$elseif defined(solaris)}  {$define ACCEPT_646}  CODESET=49;  LC_ALL = 6;{$elseif defined(beos)}  {$ifdef haiku}  CODESET= 0; // Checked for Haiku  LC_ALL = 0; // Checked for Haiku  {$else}  {$warning check correct value for BeOS}  CODESET=49;  LC_ALL = 6; // Checked for BeOS  {$endif}  ESysEILSEQ = EILSEQ;{$elseif defined(OpenBSD)}  {$define ACCEPT_646}  CODESET = 51;  LC_ALL = 0;{$elseif defined(NetBSD)}  {$define ACCEPT_646}  CODESET = 51;  LC_ALL = 0;{$elseif defined(aix)}  CODESET = 49;  LC_ALL = -1;{$elseif defined(dragonfly)}  CODESET = 0;  LC_ALL = 0;  __LC_CTYPE = 0;  _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);  _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;{$else not aix}{$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }// and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_{$endif}{ unicode encoding name }{$ifdef FPC_LITTLE_ENDIAN}  unicode_encoding2 = 'UTF-16LE';  unicode_encoding4 = 'UCS-4LE';{$else  FPC_LITTLE_ENDIAN}{$ifdef AIX}  unicode_encoding2 = 'UTF-16';{$else AIX}  unicode_encoding2 = 'UTF-16BE';  unicode_encoding4 = 'UCS-4BE';{$endif AIX}{$endif  FPC_LITTLE_ENDIAN}{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4   }{ -> 10 should be enough? Should actually use MB_CUR_MAX, but }{ that's a libc macro mapped to internal functions/variables  }{ and thus not a stable external API on systems where libc    }{ breaks backwards compatibility every now and then           }  MB_CUR_MAX = 10;{ Requests for iconvctl }  ICONV_TRIVIALP          = 0; // int *argument  ICONV_GET_TRANSLITERATE = 1; // int *argument  ICONV_SET_TRANSLITERATE = 2; // const int *argument  ICONV_GET_DISCARD_ILSEQ = 3; // int *argument  ICONV_SET_DISCARD_ILSEQ = 4; // const int *argument  ICONV_SET_HOOKS         = 5; // const struct iconv_hooks *argument  ICONV_SET_FALLBACKS     = 6; // const struct iconv_fallbacks *argumenttype  piconv_t = ^iconv_t;  iconv_t = pointer;  nl_item = cint;{$ifdef haiku}  function nl_langinfo(__item:nl_item):PAnsiChar;cdecl;external 'root' name 'nl_langinfo';{$else}  {$ifndef beos}  function nl_langinfo(__item:nl_item):PAnsiChar;cdecl;external libiconvname name 'nl_langinfo';  {$endif}{$endif}{$if (not defined(bsd) and not defined(beos)) or defined(iconv_is_in_libc) or (defined(darwin) and not defined(cpupowerpc32))}function iconv_open(__tocode:PAnsiChar; __fromcode:PAnsiChar):iconv_t;cdecl;external libiconvname name 'iconv_open';function iconv(__cd:iconv_t; __inbuf:PPAnsiChar; __inbytesleft:psize_t; __outbuf:PPAnsiChar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';const  iconvctlname='iconvctl';{$else}function iconv_open(__tocode:PAnsiChar; __fromcode:PAnsiChar):iconv_t;cdecl;external libiconvname name 'libiconv_open';function iconv(__cd:iconv_t; __inbuf:PPAnsiChar; __inbytesleft:psize_t; __outbuf:PPAnsiChar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';const  iconvctlname='libiconvctl';{$endif}var  iconvctl:function(__cd:iconv_t; __request:cint; __argument:pointer):cint;cdecl;procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];Const  UTF8Name : ShortString = 'UTF-8';  TransLitname : ShortString ='//TRANSLIT';threadvar  iconv_ansi2wide,  iconv_wide2ansi : iconv_t;  { since we cache the iconv_t converters, we have to do the same    for the DefaultSystemCodePage variable since if it changes, we    have to re-initialize the converters too. We can't do that via    a callback in the widestring manager because DefaultSystemCodePage    is not a threadvar and we can't automatically change this in all    threads }  current_DefaultSystemCodePage: TSystemCodePage;procedure InitThread;var  transliterate: cint;{$if not(defined(darwin) and (defined(cpuarm) or defined(cpuaarch64))) and not defined(iphonesim)}  iconvindex: longint;{$endif}  iconvname, toencoding: shortstring;begin  current_DefaultSystemCodePage:=DefaultSystemCodePage;{$if declared(iconvindex)}  iconvindex:=GetCodepageData(DefaultSystemCodePage);  if iconvindex<>-1 then    iconvname:=UnixCpMap[iconvindex].name  else    { default to UTF-8 on Unix platforms }    iconvname:=UTF8Name;{$else}  { Unix locale settings are ignored on iPhoneOS/iPhoneSimulator }  iconvname:='UTF-8';{$endif}  toencoding:=iconvname;  if not assigned(iconvctl) then    toencoding:=toencoding+TransLitName;  iconv_wide2ansi:=iconv_open(PAnsiChar(@toencoding[1]),unicode_encoding2);  iconv_ansi2wide:=iconv_open(unicode_encoding2,PAnsiChar(@iconvname[1]));  if assigned(iconvctl) and     (iconv_wide2ansi<>iconv_t(-1)) then  begin    transliterate:=1;    iconvctl(iconv_wide2ansi,ICONV_SET_TRANSLITERATE,@transliterate);  end;end;procedure FiniThread;begin  if (iconv_wide2ansi <> iconv_t(-1)) then    iconv_close(iconv_wide2ansi);  if (iconv_ansi2wide <> iconv_t(-1)) then    iconv_close(iconv_ansi2wide);end;{$if defined(beos) and not defined(haiku)}function nl_langinfo(__item:nl_item):PAnsiChar;begin  {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}  // Now implement the minimum required to correctly initialize WideString support  case __item of    CODESET : Result := 'UTF-8'; // BeOS use UTF-8    else    begin      Assert(False, 'nl_langinfo was called with an unknown nl_item value');      Result := '';    end;  end;end;{$endif}function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: PAnsiChar; cp_is_from: boolean): iconv_t;  var    iconvindex: longint;    toencoding: rawbytestring;    transliterate: cint;  begin    { TODO: add caching (then we also don't need separate code for      the default system page and other ones)      -- typecasting an ansistring function result to PAnsiChar is        unsafe normally, but these are constant strings -> no        problem }    open_iconv_for_cps:=iconv_t(-1);    iconvindex:=GetCodepageData(cp);    if iconvindex=-1 then      exit;    repeat      if cp_is_from then        open_iconv_for_cps:=iconv_open(otherencoding,PAnsiChar(UnixCpMap[iconvindex].name))      else      begin        toencoding:=UnixCpMap[iconvindex].name;        if not assigned(iconvctl) then          toencoding:=toencoding+'//TRANSLIT';        open_iconv_for_cps:=iconv_open(PAnsiChar(toencoding),otherencoding);      end;      inc(iconvindex);    until (open_iconv_for_cps<>iconv_t(-1)) or          (iconvindex>high(UnixCpMap)) or          (UnixCpMap[iconvindex].cp<>cp);    if not cp_is_from and      (open_iconv_for_cps<>iconv_t(-1)) and      assigned(iconvctl) then    begin      transliterate:=1;      iconvctl(open_iconv_for_cps,ICONV_SET_TRANSLITERATE,@transliterate);    end;  end;{$ifdef aix}{$i cwstraix.inc}{$endif aix}procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);  var    outlength,    outoffset,    srclen,    outleft : size_t;    use_iconv: iconv_t;    srcpos : pwidechar;    destpos: PAnsiChar;    mynil : PAnsiChar;    my0 : size_t;    err : longint;    transliterate: cint;    free_iconv: boolean;{$ifdef aix}    intermediate: rawbytestring;{$endif aix}  begin{$ifdef aix}    { AIX libiconv does not support converting cp866 to anything else except      for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }    if cp=866 then      begin        Wide2AnsiMove(source,intermediate,28595,len);        if handle_aix_intermediate(PAnsiChar(intermediate),28595,cp,dest,len) then          exit;      end;{$endif aix}    if (cp=DefaultSystemCodePage) then      begin        { update iconv converter in case the DefaultSystemCodePage has been          changed }        if current_DefaultSystemCodePage<>DefaultSystemCodePage then          begin            FiniThread;            InitThread;          end;        use_iconv:=iconv_wide2ansi;        free_iconv:=false;      end    else      begin        use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);        if (use_iconv<>iconv_t(-1)) and           assigned(iconvctl) then        begin          transliterate:=1;          iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);        end;        free_iconv:=true;      end;    { unsupported encoding -> default move }    if use_iconv=iconv_t(-1) then      begin        DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);        exit;      end;    mynil:=nil;    my0:=0;    { rought estimation }    setlength(dest,len*3);    outlength:=len*3;    srclen:=len*2;    srcpos:=source;    destpos:=PAnsiChar(dest);    outleft:=outlength;    while iconv(use_iconv,PPAnsiChar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do      begin        err:=fpgetCerrno;        case err of          { last character is incomplete sequence }          ESysEINVAL,          { incomplete sequence in the middle }          ESysEILSEQ:            begin              { skip and set to '?' }              inc(srcpos);              dec(srclen,2);              destpos^:='?';              inc(destpos);              dec(outleft);              { reset }              iconv(use_iconv,@mynil,@my0,@mynil,@my0);              if err=ESysEINVAL then                break;            end;          ESysE2BIG:            begin              outoffset:=destpos-PAnsiChar(dest);              { extend }              setlength(dest,outlength+len*3);              inc(outleft,len*3);              inc(outlength,len*3);              { string could have been moved }              destpos:=PAnsiChar(dest)+outoffset;            end;          else            runerror(231);        end;      end;    // truncate string    setlength(dest,length(dest)-outleft);    SetCodePage(dest,cp,false);    if free_iconv then      iconv_close(use_iconv);  end;procedure Ansi2WideMove(source:PAnsiChar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);  var    outlength,    outoffset,    outleft : size_t;    use_iconv: iconv_t;    srcpos,    destpos: PAnsiChar;    mynil : PAnsiChar;    my0 : size_t;    err: cint;    iconvindex: longint;    free_iconv: boolean;{$ifdef aix}    intermediate: rawbytestring;{$endif aix}  begin{$ifdef aix}    { AIX libiconv does not support converting cp866 to anything else except      for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }    if cp=866 then      begin        if handle_aix_intermediate(source,cp,cp,intermediate,len) then          source:=PAnsiChar(intermediate);      end;{$endif aix}    if (cp=DefaultSystemCodePage) then      begin        { update iconv converter in case the DefaultSystemCodePage has been          changed }        if current_DefaultSystemCodePage<>DefaultSystemCodePage then          begin            FiniThread;            InitThread;          end;        use_iconv:=iconv_ansi2wide;        free_iconv:=false;        // Using Unicode RTL, we can end up here when iconv_ansi2wide is not yet initialized        if (use_iconv=Nil) then          begin          DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);          exit;          end;      end    else      begin        { TODO: add caching (then we also don't need separate code for          the default system page and other ones)          -- typecasting an ansistring function result to PAnsiChar is            unsafe normally, but these are constant strings -> no            problem }        use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);        free_iconv:=true;      end;    { unsupported encoding -> default move }    if use_iconv=iconv_t(-1) then      begin        DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);        exit;      end;    mynil:=nil;    my0:=0;    // extra space    outlength:=len+1;    setlength(dest,outlength);    srcpos:=source;    destpos:=PAnsiChar(dest);    outleft:=outlength*2;    while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do      begin        err:=fpgetCerrno;        case err of         ESysEINVAL,         ESysEILSEQ:            begin              { skip and set to '?' }              inc(srcpos);              dec(len);              pwidechar(destpos)^:='?';              inc(destpos,2);              dec(outleft,2);              { reset }              iconv(use_iconv,@mynil,@my0,@mynil,@my0);              if err=ESysEINVAL then                break;            end;          ESysE2BIG:            begin              outoffset:=destpos-PAnsiChar(dest);              { extend }              setlength(dest,outlength+len);              inc(outleft,len*2);              inc(outlength,len);              { string could have been moved }              destpos:=PAnsiChar(dest)+outoffset;            end;          else            runerror(231);        end;      end;    // truncate string    setlength(dest,length(dest)-outleft div 2);    if free_iconv then      iconv_close(use_iconv);  end;function LowerWideString(const s : WideString) : WideString;  var    i : SizeInt;  begin    SetLength(result,length(s));    for i:=0 to length(s)-1 do      pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));  end;function UpperWideString(const s : WideString) : WideString;  var    i : SizeInt;  begin    SetLength(result,length(s));    for i:=0 to length(s)-1 do      pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));  end;procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;begin  if (len>length(s)) then    if (length(s) < 10*256) then      setlength(s,length(s)+10)    else      setlength(s,length(s)+length(s) shr 8);end;procedure ConcatCharToAnsiStr(const c: AnsiChar; var S: AnsiString; var index: SizeInt);begin  EnsureAnsiLen(s,index);  PAnsiChar(@s[index])^:=c;  inc(index);end;{ concatenates an utf-32 Char to a ansistring. S *must* be unique when entering. }{$if not(defined(beos) and not defined(haiku))}procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);{$else not beos}procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);{$endif beos}var  p     : PAnsiChar;  mblen : size_t;begin  { we know that s is unique -> avoid uniquestring calls}  p:=@s[index];  if (nc<=127) then    ConcatCharToAnsiStr(AnsiChar(nc),s,index)  else    begin      EnsureAnsiLen(s,index+MB_CUR_MAX);{$if not(defined(beos) and not defined(haiku))}      mblen:=wcrtomb(p,wchar_t(nc),@mbstate);{$else not beos}      mblen:=wctomb(p,wchar_t(nc));{$endif not beos}      if (mblen<>size_t(-1)) then        inc(index,mblen)      else        begin          { invalid wide AnsiChar }          p^:='?';          inc(index);        end;    end;end;function LowerAnsiString(const s : AnsiString) : AnsiString;  var    i, slen,    resindex : SizeInt;    mblen    : size_t;{$if not(defined(beos) and not defined(haiku))}    ombstate,    nmbstate : mbstate_t;{$endif beos}    wc       : wchar_t;  begin{$if not(defined(beos) and not defined(haiku))}    fillchar(ombstate,sizeof(ombstate),0);    fillchar(nmbstate,sizeof(nmbstate),0);{$endif beos}    slen:=length(s);    SetLength(result,slen+10);    i:=1;    resindex:=1;    while (i<=slen) do      begin        if (s[i]<=#127) then          begin            wc:=wchar_t(s[i]);            mblen:= 1;          end        else{$if not(defined(beos) and not defined(haiku))}          mblen:=mbrtowc(@wc, PAnsiChar(@s[i]), slen-i+1, @ombstate);{$else not beos}          mblen:=mbtowc(@wc, PAnsiChar(@s[i]), slen-i+1);{$endif not beos}        case mblen of          size_t(-2):            begin              { partial invalid character, copy literally }              while (i<=slen) do                begin                  ConcatCharToAnsiStr(s[i],result,resindex);                  inc(i);                end;            end;          size_t(-1), 0:            begin              { invalid or null character }              ConcatCharToAnsiStr(s[i],result,resindex);              inc(i);            end;          else            begin              { a valid sequence }              { even if mblen = 1, the lowercase version may have a }              { different length                                     }              { We can't do anything special if wchar_t is 16 bit... }{$if not(defined(beos) and not defined(haiku))}              ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);{$else not beos}              ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);{$endif not beos}              inc(i,mblen);            end;          end;      end;    SetLength(result,resindex-1);  end;function UpperAnsiString(const s : AnsiString) : AnsiString;  var    i, slen,    resindex : SizeInt;    mblen    : size_t;{$if not(defined(beos) and not defined(haiku))}    ombstate,    nmbstate : mbstate_t;{$endif beos}    wc       : wchar_t;  begin{$if not(defined(beos) and not defined(haiku))}    fillchar(ombstate,sizeof(ombstate),0);    fillchar(nmbstate,sizeof(nmbstate),0);{$endif beos}    slen:=length(s);    SetLength(result,slen+10);    i:=1;    resindex:=1;    while (i<=slen) do      begin        if (s[i]<=#127) then          begin            wc:=wchar_t(s[i]);            mblen:= 1;          end        else{$if not(defined(beos) and not defined(haiku))}          mblen:=mbrtowc(@wc, PAnsiChar(@s[i]), slen-i+1, @ombstate);{$else not beos}          mblen:=mbtowc(@wc, PAnsiChar(@s[i]), slen-i+1);{$endif beos}        case mblen of          size_t(-2):            begin              { partial invalid character, copy literally }              while (i<=slen) do                begin                  ConcatCharToAnsiStr(s[i],result,resindex);                  inc(i);                end;            end;          size_t(-1), 0:            begin              { invalid or null character }              ConcatCharToAnsiStr(s[i],result,resindex);              inc(i);            end;          else            begin              { a valid sequence }              { even if mblen = 1, the uppercase version may have a }              { different length                                     }              { We can't do anything special if wchar_t is 16 bit... }{$if not(defined(beos) and not defined(haiku))}              ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);{$else not beos}              ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);{$endif not beos}              inc(i,mblen);            end;          end;      end;    SetLength(result,resindex-1);  end;function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;  var    i, slen,    destindex : SizeInt;    uch       : UCS4Char;  begin    slen:=length(s);    setlength(result,slen+1);    i:=1;    destindex:=0;    while (i<=slen) do      begin        uch:=UCS4Char(s[i]);        if (uch=0) then          result[destindex]:=32        else if (uch<=$d7ff) or (uch>=$e000) then          result[destindex]:=uch        else if (uch<=$dbff) and          (i<slen) and          (s[i+1]>=#$dc00) and          (s[i+1]<=#$dfff) then          begin            result[destindex]:=(UCS4Char(uch-$d7c0) shl 10)+(UCS4Char(s[i+1]) xor $dc00);            inc(i);          end        else { invalid surrogate pair }          result[destindex]:=uch;        inc(i);        inc(destindex);      end;    result[destindex]:=UCS4Char(0);    { Trimming length in this particular case is just a waste of time,      because result will be interpreted as null-terminated and discarded      almost immediately }  end;function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;{$if not(defined (aix) and defined(cpupowerpc32))}  var    hs1,hs2 : UCS4String;    us1,us2 : WideString;  begin    { wcscoll interprets null chars as end-of-string -> filter out }    if coIgnoreCase in Options then      begin      us1:=UpperWideString(s1);      us2:=UpperWideString(s2);      end    else      begin      us1:=s1;      us2:=s2;      end;    hs1:=WideStringToUCS4StringNoNulls(us1);    hs2:=WideStringToUCS4StringNoNulls(us2);    result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));  end;{$else}  { AIX/PPC32 has a 16 bit wchar_t }  var    i, len: longint;    us1,us2 : WideString;    hs1, hs2: array of widechar;  begin    if coIgnoreCase in Options then      begin      us1:=UpperWideString(s1);      us2:=UpperWideString(s2);      end    else      begin      us1:=s1;      us2:=s2;      end;    len:=length(us1);    setlength(hs1,len+1);    for i:=1 to len do      if us1[i]<>#0 then        hs1[i-1]:=us1[i]      else        hs1[i-1]:=#32;    hs1[len]:=#0;    len:=length(us2);    setlength(hs2,len+1);    for i:=1 to len do      if us2[i]<>#0 then        hs2[i-1]:=us2[i]      else        hs2[i-1]:=#32;    hs2[len]:=#0;    result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));  end;{$endif}{ return value: number of code points in the string. Whenever an invalid  code point is encountered, all characters part of this invalid code point  are considered to form one "character" and the next character is  considered to be the start of a new (possibly also invalid) code point }function CharLengthPChar(const Str: PAnsiChar): PtrInt;  var    nextlen: ptrint;    s: PAnsiChar;{$if not(defined(beos) and not defined(haiku))}    mbstate: mbstate_t;{$endif not beos}  begin    result:=0;    s:=str;{$if not(defined(beos) and not defined(haiku))}    fillchar(mbstate,sizeof(mbstate),0);{$endif not beos}    repeat{$if defined(beos) and not defined(haiku)}      nextlen:=ptrint(mblen(s,MB_CUR_MAX));{$else beos}      nextlen:=ptrint(mbrlen(s,MB_CUR_MAX,@mbstate));{$endif beos}      { skip invalid/incomplete sequences }      if (nextlen<0) then        nextlen:=1;      inc(result,1);      inc(s,nextlen);    until (nextlen=0);  end;function CodePointLength(const Str: PAnsiChar; maxlookahead: ptrint): PtrInt;{$if not(defined(beos) and not defined(haiku))}  var    mbstate: mbstate_t;{$endif not beos}  begin{$if defined(beos) and not defined(haiku)}    result:=ptrint(mblen(str,maxlookahead));{$else beos}    fillchar(mbstate,sizeof(mbstate),0);    result:=ptrint(mbrlen(str,maxlookahead,@mbstate));    { mbrlen can also return -2 for "incomplete but potially valid character      and data has been processed" }    if result<0 then      result:=-1;{$endif beos}  end;function StrCompAnsiIntern(s1,s2 : PAnsiChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;  var    a,b: PAnsiChar;    i: PtrInt;  begin    if not(canmodifys1) then      getmem(a,len1+1)    else      a:=s1;    for i:=0 to len1-1 do      if s1[i]<>#0 then        a[i]:=s1[i]      else        a[i]:=#32;    a[len1]:=#0;    if not(canmodifys2) then      getmem(b,len2+1)    else      b:=s2;    for i:=0 to len2-1 do      if s2[i]<>#0 then        b[i]:=s2[i]      else        b[i]:=#32;    b[len2]:=#0;    result:=strcoll(a,b);    if not(canmodifys1) then      freemem(a);    if not(canmodifys2) then      freemem(b);  end;function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;  begin    result:=StrCompAnsiIntern(PAnsiChar(s1),PAnsiChar(s2),length(s1),length(s2),false,false);  end;function StrCompAnsi(s1,s2 : PAnsiChar): PtrInt;  begin    result:=strcoll(s1,s2);  end;function AnsiCompareText(const S1, S2: ansistring): PtrInt;  var    a, b: AnsiString;  begin    a:=UpperAnsistring(s1);    b:=UpperAnsistring(s2);    result:=StrCompAnsiIntern(PAnsiChar(a),PAnsiChar(b),length(a),length(b),true,true);  end;function AnsiStrIComp(S1, S2: PAnsiChar): PtrInt;  begin    result:=AnsiCompareText(ansistring(s1),ansistring(s2));  end;function AnsiStrLComp(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;  var    a, b: PAnsiChar;begin  if (maxlen=0) then    exit(0);  if (s1[maxlen]<>#0) then    begin      getmem(a,maxlen+1);      move(s1^,a^,maxlen);      a[maxlen]:=#0;    end  else    a:=s1;  if (s2[maxlen]<>#0) then    begin      getmem(b,maxlen+1);      move(s2^,b^,maxlen);      b[maxlen]:=#0;    end  else    b:=s2;  result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);  if (a<>s1) then    freemem(a);  if (b<>s2) then    freemem(b);end;function AnsiStrLIComp(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;  var    a, b: ansistring;begin  if (maxlen=0) then    exit(0);  setlength(a,maxlen);  move(s1^,a[1],maxlen);  setlength(b,maxlen);  move(s2^,b[1],maxlen);  result:=AnsiCompareText(a,b);end;procedure ansi2pchar(const s: ansistring; const orgp: PAnsiChar; out p: PAnsiChar);var  newlen: sizeint;begin  newlen:=length(s);  if newlen>strlen(orgp) then    fpc_rangeerror;  p:=orgp;  if (newlen>0) then    move(s[1],p[0],newlen);  p[newlen]:=#0;end;function AnsiStrLower(Str: PAnsiChar): PAnsiChar;var  temp: ansistring;begin  temp:=loweransistring(str);  ansi2pchar(temp,str,result);end;function AnsiStrUpper(Str: PAnsiChar): PAnsiChar;var  temp: ansistring;begin  temp:=upperansistring(str);  ansi2pchar(temp,str,result);end;function envvarset(const varname: PAnsiChar): boolean;var  varval: PAnsiChar;begin  varval:=fpgetenv(varname);  result:=    assigned(varval) and    (varval[0]<>#0);end;function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;var  langinfo: PAnsiChar;begin{$ifdef FPCRTL_FILESYSTEM_UTF8}  if stdcp=scpFileSystemSingleByte then    begin      result:=CP_UTF8;      exit;    end;{$endif}  { if none of the relevant LC_* environment variables are set, fall back to    UTF-8 (this happens under some versions of OS X for GUI applications, which    otherwise get CP_ASCII) }  if envvarset('LC_ALL') or     envvarset('LC_CTYPE') or     envvarset('LANG') then    begin      langinfo:=nl_langinfo(CODESET);      { there's a bug in the Mac OS X 10.5 libc (based on FreeBSD's)        that causes it to return an empty string of UTF-8 locales        -> patch up (and in general, UTF-8 is a good default on        Unix platforms) }      if not assigned(langinfo) or         (langinfo^=#0) then        langinfo:='UTF-8';      Result:=GetCodepageByName(ansistring(langinfo));    end  else    Result:=unixcp.GetSystemCodepage;end;{$ifdef FPC_HAS_CPSTRING}procedure SetStdIOCodePage(var T: Text); inline;begin  case TextRec(T).Mode of    fmInput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleInput);    fmOutput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleOutput);  end;end;procedure SetStdIOCodePages; inline;begin  SetStdIOCodePage(Input);  SetStdIOCodePage(Output);  SetStdIOCodePage(ErrOutput);  SetStdIOCodePage(StdOut);  SetStdIOCodePage(StdErr);end;{$endif FPC_HAS_CPSTRING}var  OrgWideStringManager: TUnicodeStringManager;Procedure SetCWideStringManager;Var  CWideStringManager : TUnicodeStringManager;begin  GetUnicodeStringManager(OrgWideStringManager);  CWideStringManager:=OrgWideStringManager;  With CWideStringManager do    begin      Wide2AnsiMoveProc:=@Wide2AnsiMove;      Ansi2WideMoveProc:=@Ansi2WideMove;      UpperWideStringProc:=@UpperWideString;      LowerWideStringProc:=@LowerWideString;      CompareWideStringProc:=@CompareWideString;//      CompareTextWideStringProc:=@CompareTextWideString;      CharLengthPCharProc:=@CharLengthPChar;      CodePointLengthProc:=@CodePointLength;      UpperAnsiStringProc:=@UpperAnsiString;      LowerAnsiStringProc:=@LowerAnsiString;      CompareStrAnsiStringProc:=@CompareStrAnsiString;      CompareTextAnsiStringProc:=@AnsiCompareText;      StrCompAnsiStringProc:=@StrCompAnsi;      StrICompAnsiStringProc:=@AnsiStrIComp;      StrLCompAnsiStringProc:=@AnsiStrLComp;      StrLICompAnsiStringProc:=@AnsiStrLIComp;      StrLowerAnsiStringProc:=@AnsiStrLower;      StrUpperAnsiStringProc:=@AnsiStrUpper;      ThreadInitProc:=@InitThread;      ThreadFiniProc:=@FiniThread;      { Unicode }      Unicode2AnsiMoveProc:=@Wide2AnsiMove;      Ansi2UnicodeMoveProc:=@Ansi2WideMove;      UpperUnicodeStringProc:=@UpperWideString;      LowerUnicodeStringProc:=@LowerWideString;      CompareUnicodeStringProc:=@CompareWideString;      { CodePage }      GetStandardCodePageProc:=@GetStandardCodePage;    end;  SetUnicodeStringManager(CWideStringManager);end;var  iconvlib:TLibHandle;initialization  SetCWideStringManager;  { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff  }  { with the information from the environment variables according to POSIX  }  { (some OSes do this automatically, but e.g. Darwin and Solaris don't)    }  setlocale(LC_ALL,'');  { load iconv library and iconvctl function }  iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix);  if iconvlib=0 then    iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix+'.6');  if iconvlib<>0 then    pointer(iconvctl):=GetProcAddress(iconvlib,iconvctlname);  { set the DefaultSystemCodePage }  DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);  DefaultFileSystemCodePage:=GetStandardCodePage(scpFileSystemSingleByte);  DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;  {$ifdef FPC_HAS_CPSTRING}  SetStdIOCodePages;  {$endif FPC_HAS_CPSTRING}  { init conversion tables for main program }  InitThread;finalization  { fini conversion tables for main program }  FiniThread;  { unload iconv library }  if iconvlib<>0 then    FreeLibrary(iconvlib);  { restore previous (probably default) widestring manager so that subsequent calls    into the widestring manager won't trigger the finalized functionality }  SetWideStringManager(OrgWideStringManager);end.
 |