cwstring.pp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(haiku)}
  21. {$linklib textencoding}
  22. {$linklib locale}
  23. {$else}
  24. {$linklib iconv}
  25. {$endif}
  26. {$define useiconv}
  27. {$endif linux}
  28. Uses
  29. BaseUnix,
  30. ctypes,
  31. unix,
  32. unixtype,
  33. initc;
  34. Const
  35. {$ifndef useiconv}
  36. libiconvname='c'; // is in libc under Linux.
  37. {$else}
  38. {$ifdef haiku}
  39. libiconvname='textencoding'; // is in libtextencoding under Haiku
  40. {$else}
  41. libiconvname='iconv';
  42. {$endif}
  43. {$endif}
  44. { helper functions from libc }
  45. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  46. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  47. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  48. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  49. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  50. {$ifndef beos}
  51. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  52. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  53. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  54. {$else beos}
  55. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  56. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  57. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  58. {$endif beos}
  59. const
  60. {$ifdef linux}
  61. __LC_CTYPE = 0;
  62. LC_ALL = 6;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = _NL_CTYPE_CODESET_NAME;
  66. {$else linux}
  67. {$ifdef darwin}
  68. CODESET = 0;
  69. LC_ALL = 0;
  70. {$else darwin}
  71. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  72. __LC_CTYPE = 0;
  73. LC_ALL = 0;
  74. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  75. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  76. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  77. {$else freebsd}
  78. {$ifdef solaris}
  79. CODESET=49;
  80. LC_ALL = 6;
  81. {$else solaris}
  82. {$ifdef beos}
  83. {$warning check correct value for BeOS}
  84. CODESET=49;
  85. {$ifdef haiku}
  86. LC_ALL = 0; // Checked for Haiku
  87. {$else}
  88. LC_ALL = 6; // Checked for BeOS
  89. {$endif}
  90. ESysEILSEQ = EILSEQ;
  91. {$else}
  92. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  93. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  94. {$endif beos}
  95. {$endif solaris}
  96. {$endif FreeBSD}
  97. {$endif darwin}
  98. {$endif linux}
  99. { unicode encoding name }
  100. {$ifdef FPC_LITTLE_ENDIAN}
  101. unicode_encoding2 = 'UTF-16LE';
  102. unicode_encoding4 = 'UCS-4LE';
  103. {$else FPC_LITTLE_ENDIAN}
  104. unicode_encoding2 = 'UTF-16BE';
  105. unicode_encoding4 = 'UCS-4BE';
  106. {$endif FPC_LITTLE_ENDIAN}
  107. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  108. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  109. { that's a libc macro mapped to internal functions/variables }
  110. { and thus not a stable external API on systems where libc }
  111. { breaks backwards compatibility every now and then }
  112. MB_CUR_MAX = 10;
  113. type
  114. piconv_t = ^iconv_t;
  115. iconv_t = pointer;
  116. nl_item = cint;
  117. {$ifdef haiku}
  118. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  119. {$else}
  120. {$ifndef beos}
  121. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  122. {$endif}
  123. {$endif}
  124. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  125. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  126. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  127. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  128. {$else}
  129. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  130. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  131. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  132. {$endif}
  133. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  134. threadvar
  135. iconv_ansi2wide,
  136. iconv_wide2ansi : iconv_t;
  137. { since we cache the iconv_t converters, we have to do the same
  138. for the DefaultSystemCodePage variable since if it changes, we
  139. have to re-initialize the converters too. We can't do that via
  140. a callback in the widestring manager because DefaultSystemCodePage
  141. is not a threadvar and we can't automatically change this in all
  142. threads }
  143. current_DefaultSystemCodePage: TSystemCodePage;
  144. procedure InitThread;
  145. {$if not(defined(darwin) and defined(arm))}
  146. var
  147. iconvname: rawbytestring;
  148. {$endif}
  149. begin
  150. current_DefaultSystemCodePage:=DefaultSystemCodePage;
  151. {$if not(defined(darwin) and defined(arm))}
  152. iconvname:=CodePageToCodePageName(DefaultSystemCodePage);
  153. iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
  154. iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
  155. {$else}
  156. { Unix locale settings are ignored on iPhoneOS }
  157. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  158. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  159. {$endif}
  160. end;
  161. procedure FiniThread;
  162. begin
  163. if (iconv_wide2ansi <> iconv_t(-1)) then
  164. iconv_close(iconv_wide2ansi);
  165. if (iconv_ansi2wide <> iconv_t(-1)) then
  166. iconv_close(iconv_ansi2wide);
  167. end;
  168. {$i winiconv}
  169. {$if defined(beos) and not defined(haiku)}
  170. function nl_langinfo(__item:nl_item):pchar;
  171. begin
  172. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  173. // Now implement the minimum required to correctly initialize WideString support
  174. case __item of
  175. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  176. else
  177. begin
  178. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  179. Result := '';
  180. end;
  181. end;
  182. end;
  183. {$endif}
  184. procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
  185. var
  186. outlength,
  187. outoffset,
  188. srclen,
  189. outleft : size_t;
  190. use_iconv: iconv_t;
  191. srcpos : pwidechar;
  192. destpos: pchar;
  193. mynil : pchar;
  194. my0 : size_t;
  195. err: cint;
  196. free_iconv: boolean;
  197. begin
  198. if (cp=DefaultSystemCodePage) then
  199. begin
  200. { update iconv converter in case the DefaultSystemCodePage has been
  201. changed }
  202. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  203. begin
  204. FiniThread;
  205. InitThread;
  206. end;
  207. use_iconv:=iconv_wide2ansi;
  208. free_iconv:=false;
  209. end
  210. else
  211. begin
  212. { TODO: add caching (then we also don't need separate code for
  213. the default system page and other ones)
  214. -- typecasting an ansistring function result to pchar is
  215. unsafe normally, but these are constant strings -> no
  216. problem }
  217. use_iconv:=iconv_open(pchar(CodePageToCodePageName(cp)),unicode_encoding2);
  218. free_iconv:=true;
  219. end;
  220. { unsupported encoding -> default move }
  221. if use_iconv=iconv_t(-1) then
  222. begin
  223. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  224. exit;
  225. end;
  226. mynil:=nil;
  227. my0:=0;
  228. { rought estimation }
  229. setlength(dest,len*3);
  230. SetCodePage(dest,cp,false);
  231. outlength:=len*3;
  232. srclen:=len*2;
  233. srcpos:=source;
  234. destpos:=pchar(dest);
  235. outleft:=outlength;
  236. while iconv(use_iconv,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  237. begin
  238. err:=fpgetCerrno;
  239. case err of
  240. { last character is incomplete sequence }
  241. ESysEINVAL,
  242. { incomplete sequence in the middle }
  243. ESysEILSEQ:
  244. begin
  245. { skip and set to '?' }
  246. inc(srcpos);
  247. dec(srclen,2);
  248. destpos^:='?';
  249. inc(destpos);
  250. dec(outleft);
  251. { reset }
  252. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  253. if err=ESysEINVAL then
  254. break;
  255. end;
  256. ESysE2BIG:
  257. begin
  258. outoffset:=destpos-pchar(dest);
  259. { extend }
  260. setlength(dest,outlength+len*3);
  261. inc(outleft,len*3);
  262. inc(outlength,len*3);
  263. { string could have been moved }
  264. destpos:=pchar(dest)+outoffset;
  265. end;
  266. else
  267. runerror(231);
  268. end;
  269. end;
  270. // truncate string
  271. setlength(dest,length(dest)-outleft);
  272. if free_iconv then
  273. iconv_close(use_iconv);
  274. end;
  275. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  276. var
  277. outlength,
  278. outoffset,
  279. outleft : size_t;
  280. use_iconv: iconv_t;
  281. srcpos,
  282. destpos: pchar;
  283. mynil : pchar;
  284. my0 : size_t;
  285. err: cint;
  286. free_iconv: boolean;
  287. begin
  288. if (cp=DefaultSystemCodePage) then
  289. begin
  290. { update iconv converter in case the DefaultSystemCodePage has been
  291. changed }
  292. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  293. begin
  294. FiniThread;
  295. InitThread;
  296. end;
  297. use_iconv:=iconv_ansi2wide;
  298. free_iconv:=false;
  299. end
  300. else
  301. begin
  302. { TODO: add caching (then we also don't need separate code for
  303. the default system page and other ones)
  304. -- typecasting an ansistring function result to pchar is
  305. unsafe normally, but these are constant strings -> no
  306. problem }
  307. use_iconv:=iconv_open(unicode_encoding2,pchar(CodePageToCodePageName(cp)));
  308. free_iconv:=true;
  309. end;
  310. { unsupported encoding -> default move }
  311. if use_iconv=iconv_t(-1) then
  312. begin
  313. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  314. exit;
  315. end;
  316. mynil:=nil;
  317. my0:=0;
  318. // extra space
  319. outlength:=len+1;
  320. setlength(dest,outlength);
  321. srcpos:=source;
  322. destpos:=pchar(dest);
  323. outleft:=outlength*2;
  324. while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  325. begin
  326. err:=fpgetCerrno;
  327. case err of
  328. ESysEINVAL,
  329. ESysEILSEQ:
  330. begin
  331. { skip and set to '?' }
  332. inc(srcpos);
  333. dec(len);
  334. pwidechar(destpos)^:='?';
  335. inc(destpos,2);
  336. dec(outleft,2);
  337. { reset }
  338. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  339. if err=ESysEINVAL then
  340. break;
  341. end;
  342. ESysE2BIG:
  343. begin
  344. outoffset:=destpos-pchar(dest);
  345. { extend }
  346. setlength(dest,outlength+len);
  347. inc(outleft,len*2);
  348. inc(outlength,len);
  349. { string could have been moved }
  350. destpos:=pchar(dest)+outoffset;
  351. end;
  352. else
  353. runerror(231);
  354. end;
  355. end;
  356. // truncate string
  357. setlength(dest,length(dest)-outleft div 2);
  358. if free_iconv then
  359. iconv_close(use_iconv);
  360. end;
  361. function LowerWideString(const s : WideString) : WideString;
  362. var
  363. i : SizeInt;
  364. begin
  365. SetLength(result,length(s));
  366. for i:=0 to length(s)-1 do
  367. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  368. end;
  369. function UpperWideString(const s : WideString) : WideString;
  370. var
  371. i : SizeInt;
  372. begin
  373. SetLength(result,length(s));
  374. for i:=0 to length(s)-1 do
  375. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  376. end;
  377. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  378. begin
  379. if (len>length(s)) then
  380. if (length(s) < 10*256) then
  381. setlength(s,length(s)+10)
  382. else
  383. setlength(s,length(s)+length(s) shr 8);
  384. end;
  385. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  386. begin
  387. EnsureAnsiLen(s,index);
  388. pchar(@s[index])^:=c;
  389. inc(index);
  390. end;
  391. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  392. {$ifndef beos}
  393. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  394. {$else not beos}
  395. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  396. {$endif beos}
  397. var
  398. p : pchar;
  399. mblen : size_t;
  400. begin
  401. { we know that s is unique -> avoid uniquestring calls}
  402. p:=@s[index];
  403. if (nc<=127) then
  404. ConcatCharToAnsiStr(char(nc),s,index)
  405. else
  406. begin
  407. EnsureAnsiLen(s,index+MB_CUR_MAX);
  408. {$ifndef beos}
  409. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  410. {$else not beos}
  411. mblen:=wctomb(p,wchar_t(nc));
  412. {$endif not beos}
  413. if (mblen<>size_t(-1)) then
  414. inc(index,mblen)
  415. else
  416. begin
  417. { invalid wide char }
  418. p^:='?';
  419. inc(index);
  420. end;
  421. end;
  422. end;
  423. function LowerAnsiString(const s : AnsiString) : AnsiString;
  424. var
  425. i, slen,
  426. resindex : SizeInt;
  427. mblen : size_t;
  428. {$ifndef beos}
  429. ombstate,
  430. nmbstate : mbstate_t;
  431. {$endif beos}
  432. wc : wchar_t;
  433. begin
  434. {$ifndef beos}
  435. fillchar(ombstate,sizeof(ombstate),0);
  436. fillchar(nmbstate,sizeof(nmbstate),0);
  437. {$endif beos}
  438. slen:=length(s);
  439. SetLength(result,slen+10);
  440. i:=1;
  441. resindex:=1;
  442. while (i<=slen) do
  443. begin
  444. if (s[i]<=#127) then
  445. begin
  446. wc:=wchar_t(s[i]);
  447. mblen:= 1;
  448. end
  449. else
  450. {$ifndef beos}
  451. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  452. {$else not beos}
  453. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  454. {$endif not beos}
  455. case mblen of
  456. size_t(-2):
  457. begin
  458. { partial invalid character, copy literally }
  459. while (i<=slen) do
  460. begin
  461. ConcatCharToAnsiStr(s[i],result,resindex);
  462. inc(i);
  463. end;
  464. end;
  465. size_t(-1), 0:
  466. begin
  467. { invalid or null character }
  468. ConcatCharToAnsiStr(s[i],result,resindex);
  469. inc(i);
  470. end;
  471. else
  472. begin
  473. { a valid sequence }
  474. { even if mblen = 1, the lowercase version may have a }
  475. { different length }
  476. { We can't do anything special if wchar_t is 16 bit... }
  477. {$ifndef beos}
  478. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  479. {$else not beos}
  480. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  481. {$endif not beos}
  482. inc(i,mblen);
  483. end;
  484. end;
  485. end;
  486. SetLength(result,resindex-1);
  487. end;
  488. function UpperAnsiString(const s : AnsiString) : AnsiString;
  489. var
  490. i, slen,
  491. resindex : SizeInt;
  492. mblen : size_t;
  493. {$ifndef beos}
  494. ombstate,
  495. nmbstate : mbstate_t;
  496. {$endif beos}
  497. wc : wchar_t;
  498. begin
  499. {$ifndef beos}
  500. fillchar(ombstate,sizeof(ombstate),0);
  501. fillchar(nmbstate,sizeof(nmbstate),0);
  502. {$endif beos}
  503. slen:=length(s);
  504. SetLength(result,slen+10);
  505. i:=1;
  506. resindex:=1;
  507. while (i<=slen) do
  508. begin
  509. if (s[i]<=#127) then
  510. begin
  511. wc:=wchar_t(s[i]);
  512. mblen:= 1;
  513. end
  514. else
  515. {$ifndef beos}
  516. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  517. {$else not beos}
  518. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  519. {$endif beos}
  520. case mblen of
  521. size_t(-2):
  522. begin
  523. { partial invalid character, copy literally }
  524. while (i<=slen) do
  525. begin
  526. ConcatCharToAnsiStr(s[i],result,resindex);
  527. inc(i);
  528. end;
  529. end;
  530. size_t(-1), 0:
  531. begin
  532. { invalid or null character }
  533. ConcatCharToAnsiStr(s[i],result,resindex);
  534. inc(i);
  535. end;
  536. else
  537. begin
  538. { a valid sequence }
  539. { even if mblen = 1, the uppercase version may have a }
  540. { different length }
  541. { We can't do anything special if wchar_t is 16 bit... }
  542. {$ifndef beos}
  543. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  544. {$else not beos}
  545. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  546. {$endif not beos}
  547. inc(i,mblen);
  548. end;
  549. end;
  550. end;
  551. SetLength(result,resindex-1);
  552. end;
  553. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  554. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  555. var
  556. i, slen,
  557. destindex : SizeInt;
  558. len : longint;
  559. uch : UCS4Char;
  560. begin
  561. slen:=length(s);
  562. setlength(result,slen+1);
  563. i:=1;
  564. destindex:=0;
  565. while (i<=slen) do
  566. begin
  567. uch:=utf16toutf32(s,i,len);
  568. if (uch=UCS4Char(0)) then
  569. uch:=UCS4Char(32);
  570. result[destindex]:=uch;
  571. inc(destindex);
  572. inc(i,len);
  573. end;
  574. result[destindex]:=UCS4Char(0);
  575. { destindex <= slen }
  576. setlength(result,destindex+1);
  577. end;
  578. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  579. var
  580. hs1,hs2 : UCS4String;
  581. begin
  582. { wcscoll interprets null chars as end-of-string -> filter out }
  583. hs1:=WideStringToUCS4StringNoNulls(s1);
  584. hs2:=WideStringToUCS4StringNoNulls(s2);
  585. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  586. end;
  587. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  588. begin
  589. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  590. end;
  591. function CharLengthPChar(const Str: PChar): PtrInt;
  592. var
  593. nextlen: ptrint;
  594. s: pchar;
  595. {$ifndef beos}
  596. mbstate: mbstate_t;
  597. {$endif not beos}
  598. begin
  599. result:=0;
  600. s:=str;
  601. {$ifndef beos}
  602. fillchar(mbstate,sizeof(mbstate),0);
  603. {$endif not beos}
  604. repeat
  605. {$ifdef beos}
  606. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  607. {$else beos}
  608. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  609. {$endif beos}
  610. { skip invalid/incomplete sequences }
  611. if (nextlen<0) then
  612. nextlen:=1;
  613. inc(result,nextlen);
  614. inc(s,nextlen);
  615. until (nextlen=0);
  616. end;
  617. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  618. var
  619. nextlen: ptrint;
  620. {$ifndef beos}
  621. mbstate: mbstate_t;
  622. {$endif not beos}
  623. begin
  624. {$ifdef beos}
  625. result:=ptrint(mblen(str,maxlookahead));
  626. {$else beos}
  627. fillchar(mbstate,sizeof(mbstate),0);
  628. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  629. { mbrlen can also return -2 for "incomplete but potially valid character
  630. and data has been processed" }
  631. if result<0 then
  632. result:=-1;
  633. {$endif beos}
  634. end;
  635. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  636. var
  637. a,b: pchar;
  638. i: PtrInt;
  639. begin
  640. if not(canmodifys1) then
  641. getmem(a,len1+1)
  642. else
  643. a:=s1;
  644. for i:=0 to len1-1 do
  645. if s1[i]<>#0 then
  646. a[i]:=s1[i]
  647. else
  648. a[i]:=#32;
  649. a[len1]:=#0;
  650. if not(canmodifys2) then
  651. getmem(b,len2+1)
  652. else
  653. b:=s2;
  654. for i:=0 to len2-1 do
  655. if s2[i]<>#0 then
  656. b[i]:=s2[i]
  657. else
  658. b[i]:=#32;
  659. b[len2]:=#0;
  660. result:=strcoll(a,b);
  661. if not(canmodifys1) then
  662. freemem(a);
  663. if not(canmodifys2) then
  664. freemem(b);
  665. end;
  666. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  667. begin
  668. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  669. end;
  670. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  671. begin
  672. result:=strcoll(s1,s2);
  673. end;
  674. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  675. var
  676. a, b: AnsiString;
  677. begin
  678. a:=UpperAnsistring(s1);
  679. b:=UpperAnsistring(s2);
  680. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  681. end;
  682. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  683. begin
  684. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  685. end;
  686. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  687. var
  688. a, b: pchar;
  689. begin
  690. if (maxlen=0) then
  691. exit(0);
  692. if (s1[maxlen]<>#0) then
  693. begin
  694. getmem(a,maxlen+1);
  695. move(s1^,a^,maxlen);
  696. a[maxlen]:=#0;
  697. end
  698. else
  699. a:=s1;
  700. if (s2[maxlen]<>#0) then
  701. begin
  702. getmem(b,maxlen+1);
  703. move(s2^,b^,maxlen);
  704. b[maxlen]:=#0;
  705. end
  706. else
  707. b:=s2;
  708. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  709. if (a<>s1) then
  710. freemem(a);
  711. if (b<>s2) then
  712. freemem(b);
  713. end;
  714. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  715. var
  716. a, b: ansistring;
  717. begin
  718. if (maxlen=0) then
  719. exit(0);
  720. setlength(a,maxlen);
  721. move(s1^,a[1],maxlen);
  722. setlength(b,maxlen);
  723. move(s2^,b[1],maxlen);
  724. result:=AnsiCompareText(a,b);
  725. end;
  726. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  727. var
  728. newlen: sizeint;
  729. begin
  730. newlen:=length(s);
  731. if newlen>strlen(orgp) then
  732. fpc_rangeerror;
  733. p:=orgp;
  734. if (newlen>0) then
  735. move(s[1],p[0],newlen);
  736. p[newlen]:=#0;
  737. end;
  738. function AnsiStrLower(Str: PChar): PChar;
  739. var
  740. temp: ansistring;
  741. begin
  742. temp:=loweransistring(str);
  743. ansi2pchar(temp,str,result);
  744. end;
  745. function AnsiStrUpper(Str: PChar): PChar;
  746. var
  747. temp: ansistring;
  748. begin
  749. temp:=upperansistring(str);
  750. ansi2pchar(temp,str,result);
  751. end;
  752. Procedure SetCWideStringManager;
  753. Var
  754. CWideStringManager : TUnicodeStringManager;
  755. begin
  756. CWideStringManager:=widestringmanager;
  757. With CWideStringManager do
  758. begin
  759. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  760. Ansi2WideMoveProc:=@Ansi2WideMove;
  761. UpperWideStringProc:=@UpperWideString;
  762. LowerWideStringProc:=@LowerWideString;
  763. CompareWideStringProc:=@CompareWideString;
  764. CompareTextWideStringProc:=@CompareTextWideString;
  765. CharLengthPCharProc:=@CharLengthPChar;
  766. CodePointLengthProc:=@CodePointLength;
  767. UpperAnsiStringProc:=@UpperAnsiString;
  768. LowerAnsiStringProc:=@LowerAnsiString;
  769. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  770. CompareTextAnsiStringProc:=@AnsiCompareText;
  771. StrCompAnsiStringProc:=@StrCompAnsi;
  772. StrICompAnsiStringProc:=@AnsiStrIComp;
  773. StrLCompAnsiStringProc:=@AnsiStrLComp;
  774. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  775. StrLowerAnsiStringProc:=@AnsiStrLower;
  776. StrUpperAnsiStringProc:=@AnsiStrUpper;
  777. ThreadInitProc:=@InitThread;
  778. ThreadFiniProc:=@FiniThread;
  779. { Unicode }
  780. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  781. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  782. UpperUnicodeStringProc:=@UpperWideString;
  783. LowerUnicodeStringProc:=@LowerWideString;
  784. CompareUnicodeStringProc:=@CompareWideString;
  785. CompareTextUnicodeStringProc:=@CompareTextWideString;
  786. end;
  787. SetUnicodeStringManager(CWideStringManager);
  788. end;
  789. initialization
  790. SetCWideStringManager;
  791. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  792. { with the information from the environment variables according to POSIX }
  793. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  794. setlocale(LC_ALL,'');
  795. { set the DefaultSystemCodePage }
  796. DefaultSystemCodePage:=CodePageNameToCodePage(ansistring(nl_langinfo(CODESET)));
  797. { init conversion tables for main program }
  798. InitThread;
  799. finalization
  800. { fini conversion tables for main program }
  801. FiniThread;
  802. end.