cwstring.pp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(haiku)}
  21. {$linklib textencoding}
  22. {$linklib locale}
  23. {$else}
  24. {$linklib iconv}
  25. {$endif}
  26. {$define useiconv}
  27. {$endif linux}
  28. Uses
  29. BaseUnix,
  30. ctypes,
  31. unix,
  32. unixtype,
  33. initc;
  34. Const
  35. {$ifndef useiconv}
  36. libiconvname='c'; // is in libc under Linux.
  37. {$else}
  38. {$ifdef haiku}
  39. libiconvname='textencoding'; // is in libtextencoding under Haiku
  40. {$else}
  41. libiconvname='iconv';
  42. {$endif}
  43. {$endif}
  44. { helper functions from libc }
  45. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  46. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  47. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  48. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  49. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  50. {$ifndef beos}
  51. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  52. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  53. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  54. {$else beos}
  55. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  56. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  57. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  58. {$endif beos}
  59. const
  60. {$ifdef linux}
  61. __LC_CTYPE = 0;
  62. LC_ALL = 6;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = _NL_CTYPE_CODESET_NAME;
  66. {$else linux}
  67. {$ifdef darwin}
  68. CODESET = 0;
  69. LC_ALL = 0;
  70. {$else darwin}
  71. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  72. __LC_CTYPE = 0;
  73. LC_ALL = 0;
  74. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  75. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  76. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  77. {$else freebsd}
  78. {$ifdef solaris}
  79. CODESET=49;
  80. LC_ALL = 6;
  81. {$else solaris}
  82. {$ifdef beos}
  83. {$warning check correct value for BeOS}
  84. CODESET=49;
  85. {$ifdef haiku}
  86. LC_ALL = 0; // Checked for Haiku
  87. {$else}
  88. LC_ALL = 6; // Checked for BeOS
  89. {$endif}
  90. ESysEILSEQ = EILSEQ;
  91. {$else}
  92. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  93. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  94. {$endif beos}
  95. {$endif solaris}
  96. {$endif FreeBSD}
  97. {$endif darwin}
  98. {$endif linux}
  99. { unicode encoding name }
  100. {$ifdef FPC_LITTLE_ENDIAN}
  101. unicode_encoding2 = 'UTF-16LE';
  102. unicode_encoding4 = 'UCS-4LE';
  103. {$else FPC_LITTLE_ENDIAN}
  104. unicode_encoding2 = 'UTF-16BE';
  105. unicode_encoding4 = 'UCS-4BE';
  106. {$endif FPC_LITTLE_ENDIAN}
  107. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  108. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  109. { that's a libc macro mapped to internal functions/variables }
  110. { and thus not a stable external API on systems where libc }
  111. { breaks backwards compatibility every now and then }
  112. MB_CUR_MAX = 10;
  113. type
  114. piconv_t = ^iconv_t;
  115. iconv_t = pointer;
  116. nl_item = cint;
  117. {$ifdef haiku}
  118. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  119. {$else}
  120. {$ifndef beos}
  121. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  122. {$endif}
  123. {$endif}
  124. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  125. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  126. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  127. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  128. {$else}
  129. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  130. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  131. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  132. {$endif}
  133. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  134. threadvar
  135. iconv_ansi2wide,
  136. iconv_wide2ansi : iconv_t;
  137. { since we cache the iconv_t converters, we have to do the same
  138. for the DefaultSystemCodePage variable since if it changes, we
  139. have to re-initialize the converters too. We can't do that via
  140. a callback in the widestring manager because DefaultSystemCodePage
  141. is not a threadvar and we can't automatically change this in all
  142. threads }
  143. current_DefaultSystemCodePage: TSystemCodePage;
  144. function win2iconv(cp: word): rawbytestring; forward;
  145. procedure InitThread;
  146. {$if not(defined(darwin) and defined(arm))}
  147. var
  148. iconvname: rawbytestring;
  149. {$endif}
  150. begin
  151. current_DefaultSystemCodePage:=DefaultSystemCodePage;
  152. {$if not(defined(darwin) and defined(arm))}
  153. iconvname:=win2iconv(DefaultSystemCodePage);
  154. iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
  155. iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
  156. {$else}
  157. { Unix locale settings are ignored on iPhoneOS }
  158. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  159. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  160. {$endif}
  161. end;
  162. procedure FiniThread;
  163. begin
  164. if (iconv_wide2ansi <> iconv_t(-1)) then
  165. iconv_close(iconv_wide2ansi);
  166. if (iconv_ansi2wide <> iconv_t(-1)) then
  167. iconv_close(iconv_ansi2wide);
  168. end;
  169. {$i winiconv}
  170. {$if defined(beos) and not defined(haiku)}
  171. function nl_langinfo(__item:nl_item):pchar;
  172. begin
  173. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  174. // Now implement the minimum required to correctly initialize WideString support
  175. case __item of
  176. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  177. else
  178. begin
  179. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  180. Result := '';
  181. end;
  182. end;
  183. end;
  184. {$endif}
  185. procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
  186. var
  187. outlength,
  188. outoffset,
  189. srclen,
  190. outleft : size_t;
  191. use_iconv: iconv_t;
  192. srcpos : pwidechar;
  193. destpos: pchar;
  194. mynil : pchar;
  195. my0 : size_t;
  196. err: cint;
  197. free_iconv: boolean;
  198. begin
  199. if (cp=DefaultSystemCodePage) then
  200. begin
  201. { update iconv converter in case the DefaultSystemCodePage has been
  202. changed }
  203. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  204. begin
  205. FiniThread;
  206. InitThread;
  207. end;
  208. use_iconv:=iconv_wide2ansi;
  209. free_iconv:=false;
  210. end
  211. else
  212. begin
  213. { TODO: add caching (then we also don't need separate code for
  214. the default system page and other ones)
  215. -- typecasting an ansistring function result to pchar is
  216. unsafe normally, but these are constant strings -> no
  217. problem }
  218. use_iconv:=iconv_open(pchar(win2iconv(cp)),unicode_encoding2);
  219. free_iconv:=true;
  220. end;
  221. { unsupported encoding -> default move }
  222. if use_iconv=iconv_t(-1) then
  223. begin
  224. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  225. exit;
  226. end;
  227. mynil:=nil;
  228. my0:=0;
  229. { rought estimation }
  230. setlength(dest,len*3);
  231. SetCodePage(dest,cp,false);
  232. outlength:=len*3;
  233. srclen:=len*2;
  234. srcpos:=source;
  235. destpos:=pchar(dest);
  236. outleft:=outlength;
  237. while iconv(use_iconv,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  238. begin
  239. err:=fpgetCerrno;
  240. case err of
  241. { last character is incomplete sequence }
  242. ESysEINVAL,
  243. { incomplete sequence in the middle }
  244. ESysEILSEQ:
  245. begin
  246. { skip and set to '?' }
  247. inc(srcpos);
  248. dec(srclen,2);
  249. destpos^:='?';
  250. inc(destpos);
  251. dec(outleft);
  252. { reset }
  253. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  254. if err=ESysEINVAL then
  255. break;
  256. end;
  257. ESysE2BIG:
  258. begin
  259. outoffset:=destpos-pchar(dest);
  260. { extend }
  261. setlength(dest,outlength+len*3);
  262. inc(outleft,len*3);
  263. inc(outlength,len*3);
  264. { string could have been moved }
  265. destpos:=pchar(dest)+outoffset;
  266. end;
  267. else
  268. runerror(231);
  269. end;
  270. end;
  271. // truncate string
  272. setlength(dest,length(dest)-outleft);
  273. if free_iconv then
  274. iconv_close(use_iconv);
  275. end;
  276. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  277. var
  278. outlength,
  279. outoffset,
  280. outleft : size_t;
  281. use_iconv: iconv_t;
  282. srcpos,
  283. destpos: pchar;
  284. mynil : pchar;
  285. my0 : size_t;
  286. err: cint;
  287. free_iconv: boolean;
  288. begin
  289. if (cp=DefaultSystemCodePage) then
  290. begin
  291. { update iconv converter in case the DefaultSystemCodePage has been
  292. changed }
  293. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  294. begin
  295. FiniThread;
  296. InitThread;
  297. end;
  298. use_iconv:=iconv_ansi2wide;
  299. free_iconv:=false;
  300. end
  301. else
  302. begin
  303. { TODO: add caching (then we also don't need separate code for
  304. the default system page and other ones)
  305. -- typecasting an ansistring function result to pchar is
  306. unsafe normally, but these are constant strings -> no
  307. problem }
  308. use_iconv:=iconv_open(unicode_encoding2,pchar(win2iconv(cp)));
  309. free_iconv:=true;
  310. end;
  311. { unsupported encoding -> default move }
  312. if use_iconv=iconv_t(-1) then
  313. begin
  314. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  315. exit;
  316. end;
  317. mynil:=nil;
  318. my0:=0;
  319. // extra space
  320. outlength:=len+1;
  321. setlength(dest,outlength);
  322. srcpos:=source;
  323. destpos:=pchar(dest);
  324. outleft:=outlength*2;
  325. while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  326. begin
  327. err:=fpgetCerrno;
  328. case err of
  329. ESysEINVAL,
  330. ESysEILSEQ:
  331. begin
  332. { skip and set to '?' }
  333. inc(srcpos);
  334. dec(len);
  335. pwidechar(destpos)^:='?';
  336. inc(destpos,2);
  337. dec(outleft,2);
  338. { reset }
  339. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  340. if err=ESysEINVAL then
  341. break;
  342. end;
  343. ESysE2BIG:
  344. begin
  345. outoffset:=destpos-pchar(dest);
  346. { extend }
  347. setlength(dest,outlength+len);
  348. inc(outleft,len*2);
  349. inc(outlength,len);
  350. { string could have been moved }
  351. destpos:=pchar(dest)+outoffset;
  352. end;
  353. else
  354. runerror(231);
  355. end;
  356. end;
  357. // truncate string
  358. setlength(dest,length(dest)-outleft div 2);
  359. if free_iconv then
  360. iconv_close(use_iconv);
  361. end;
  362. function LowerWideString(const s : WideString) : WideString;
  363. var
  364. i : SizeInt;
  365. begin
  366. SetLength(result,length(s));
  367. for i:=0 to length(s)-1 do
  368. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  369. end;
  370. function UpperWideString(const s : WideString) : WideString;
  371. var
  372. i : SizeInt;
  373. begin
  374. SetLength(result,length(s));
  375. for i:=0 to length(s)-1 do
  376. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  377. end;
  378. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  379. begin
  380. if (len>length(s)) then
  381. if (length(s) < 10*256) then
  382. setlength(s,length(s)+10)
  383. else
  384. setlength(s,length(s)+length(s) shr 8);
  385. end;
  386. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  387. begin
  388. EnsureAnsiLen(s,index);
  389. pchar(@s[index])^:=c;
  390. inc(index);
  391. end;
  392. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  393. {$ifndef beos}
  394. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  395. {$else not beos}
  396. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  397. {$endif beos}
  398. var
  399. p : pchar;
  400. mblen : size_t;
  401. begin
  402. { we know that s is unique -> avoid uniquestring calls}
  403. p:=@s[index];
  404. if (nc<=127) then
  405. ConcatCharToAnsiStr(char(nc),s,index)
  406. else
  407. begin
  408. EnsureAnsiLen(s,index+MB_CUR_MAX);
  409. {$ifndef beos}
  410. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  411. {$else not beos}
  412. mblen:=wctomb(p,wchar_t(nc));
  413. {$endif not beos}
  414. if (mblen<>size_t(-1)) then
  415. inc(index,mblen)
  416. else
  417. begin
  418. { invalid wide char }
  419. p^:='?';
  420. inc(index);
  421. end;
  422. end;
  423. end;
  424. function LowerAnsiString(const s : AnsiString) : AnsiString;
  425. var
  426. i, slen,
  427. resindex : SizeInt;
  428. mblen : size_t;
  429. {$ifndef beos}
  430. ombstate,
  431. nmbstate : mbstate_t;
  432. {$endif beos}
  433. wc : wchar_t;
  434. begin
  435. {$ifndef beos}
  436. fillchar(ombstate,sizeof(ombstate),0);
  437. fillchar(nmbstate,sizeof(nmbstate),0);
  438. {$endif beos}
  439. slen:=length(s);
  440. SetLength(result,slen+10);
  441. i:=1;
  442. resindex:=1;
  443. while (i<=slen) do
  444. begin
  445. if (s[i]<=#127) then
  446. begin
  447. wc:=wchar_t(s[i]);
  448. mblen:= 1;
  449. end
  450. else
  451. {$ifndef beos}
  452. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  453. {$else not beos}
  454. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  455. {$endif not beos}
  456. case mblen of
  457. size_t(-2):
  458. begin
  459. { partial invalid character, copy literally }
  460. while (i<=slen) do
  461. begin
  462. ConcatCharToAnsiStr(s[i],result,resindex);
  463. inc(i);
  464. end;
  465. end;
  466. size_t(-1), 0:
  467. begin
  468. { invalid or null character }
  469. ConcatCharToAnsiStr(s[i],result,resindex);
  470. inc(i);
  471. end;
  472. else
  473. begin
  474. { a valid sequence }
  475. { even if mblen = 1, the lowercase version may have a }
  476. { different length }
  477. { We can't do anything special if wchar_t is 16 bit... }
  478. {$ifndef beos}
  479. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  480. {$else not beos}
  481. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  482. {$endif not beos}
  483. inc(i,mblen);
  484. end;
  485. end;
  486. end;
  487. SetLength(result,resindex-1);
  488. end;
  489. function UpperAnsiString(const s : AnsiString) : AnsiString;
  490. var
  491. i, slen,
  492. resindex : SizeInt;
  493. mblen : size_t;
  494. {$ifndef beos}
  495. ombstate,
  496. nmbstate : mbstate_t;
  497. {$endif beos}
  498. wc : wchar_t;
  499. begin
  500. {$ifndef beos}
  501. fillchar(ombstate,sizeof(ombstate),0);
  502. fillchar(nmbstate,sizeof(nmbstate),0);
  503. {$endif beos}
  504. slen:=length(s);
  505. SetLength(result,slen+10);
  506. i:=1;
  507. resindex:=1;
  508. while (i<=slen) do
  509. begin
  510. if (s[i]<=#127) then
  511. begin
  512. wc:=wchar_t(s[i]);
  513. mblen:= 1;
  514. end
  515. else
  516. {$ifndef beos}
  517. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  518. {$else not beos}
  519. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  520. {$endif beos}
  521. case mblen of
  522. size_t(-2):
  523. begin
  524. { partial invalid character, copy literally }
  525. while (i<=slen) do
  526. begin
  527. ConcatCharToAnsiStr(s[i],result,resindex);
  528. inc(i);
  529. end;
  530. end;
  531. size_t(-1), 0:
  532. begin
  533. { invalid or null character }
  534. ConcatCharToAnsiStr(s[i],result,resindex);
  535. inc(i);
  536. end;
  537. else
  538. begin
  539. { a valid sequence }
  540. { even if mblen = 1, the uppercase version may have a }
  541. { different length }
  542. { We can't do anything special if wchar_t is 16 bit... }
  543. {$ifndef beos}
  544. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  545. {$else not beos}
  546. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  547. {$endif not beos}
  548. inc(i,mblen);
  549. end;
  550. end;
  551. end;
  552. SetLength(result,resindex-1);
  553. end;
  554. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  555. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  556. var
  557. i, slen,
  558. destindex : SizeInt;
  559. len : longint;
  560. uch : UCS4Char;
  561. begin
  562. slen:=length(s);
  563. setlength(result,slen+1);
  564. i:=1;
  565. destindex:=0;
  566. while (i<=slen) do
  567. begin
  568. uch:=utf16toutf32(s,i,len);
  569. if (uch=UCS4Char(0)) then
  570. uch:=UCS4Char(32);
  571. result[destindex]:=uch;
  572. inc(destindex);
  573. inc(i,len);
  574. end;
  575. result[destindex]:=UCS4Char(0);
  576. { destindex <= slen }
  577. setlength(result,destindex+1);
  578. end;
  579. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  580. var
  581. hs1,hs2 : UCS4String;
  582. begin
  583. { wcscoll interprets null chars as end-of-string -> filter out }
  584. hs1:=WideStringToUCS4StringNoNulls(s1);
  585. hs2:=WideStringToUCS4StringNoNulls(s2);
  586. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  587. end;
  588. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  589. begin
  590. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  591. end;
  592. function CharLengthPChar(const Str: PChar): PtrInt;
  593. var
  594. nextlen: ptrint;
  595. s: pchar;
  596. {$ifndef beos}
  597. mbstate: mbstate_t;
  598. {$endif not beos}
  599. begin
  600. result:=0;
  601. s:=str;
  602. {$ifndef beos}
  603. fillchar(mbstate,sizeof(mbstate),0);
  604. {$endif not beos}
  605. repeat
  606. {$ifdef beos}
  607. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  608. {$else beos}
  609. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  610. {$endif beos}
  611. { skip invalid/incomplete sequences }
  612. if (nextlen<0) then
  613. nextlen:=1;
  614. inc(result,nextlen);
  615. inc(s,nextlen);
  616. until (nextlen=0);
  617. end;
  618. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  619. var
  620. nextlen: ptrint;
  621. {$ifndef beos}
  622. mbstate: mbstate_t;
  623. {$endif not beos}
  624. begin
  625. {$ifdef beos}
  626. result:=ptrint(mblen(str,maxlookahead));
  627. {$else beos}
  628. fillchar(mbstate,sizeof(mbstate),0);
  629. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  630. { mbrlen can also return -2 for "incomplete but potially valid character
  631. and data has been processed" }
  632. if result<0 then
  633. result:=-1;
  634. {$endif beos}
  635. end;
  636. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  637. var
  638. a,b: pchar;
  639. i: PtrInt;
  640. begin
  641. if not(canmodifys1) then
  642. getmem(a,len1+1)
  643. else
  644. a:=s1;
  645. for i:=0 to len1-1 do
  646. if s1[i]<>#0 then
  647. a[i]:=s1[i]
  648. else
  649. a[i]:=#32;
  650. a[len1]:=#0;
  651. if not(canmodifys2) then
  652. getmem(b,len2+1)
  653. else
  654. b:=s2;
  655. for i:=0 to len2-1 do
  656. if s2[i]<>#0 then
  657. b[i]:=s2[i]
  658. else
  659. b[i]:=#32;
  660. b[len2]:=#0;
  661. result:=strcoll(a,b);
  662. if not(canmodifys1) then
  663. freemem(a);
  664. if not(canmodifys2) then
  665. freemem(b);
  666. end;
  667. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  668. begin
  669. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  670. end;
  671. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  672. begin
  673. result:=strcoll(s1,s2);
  674. end;
  675. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  676. var
  677. a, b: AnsiString;
  678. begin
  679. a:=UpperAnsistring(s1);
  680. b:=UpperAnsistring(s2);
  681. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  682. end;
  683. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  684. begin
  685. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  686. end;
  687. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  688. var
  689. a, b: pchar;
  690. begin
  691. if (maxlen=0) then
  692. exit(0);
  693. if (s1[maxlen]<>#0) then
  694. begin
  695. getmem(a,maxlen+1);
  696. move(s1^,a^,maxlen);
  697. a[maxlen]:=#0;
  698. end
  699. else
  700. a:=s1;
  701. if (s2[maxlen]<>#0) then
  702. begin
  703. getmem(b,maxlen+1);
  704. move(s2^,b^,maxlen);
  705. b[maxlen]:=#0;
  706. end
  707. else
  708. b:=s2;
  709. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  710. if (a<>s1) then
  711. freemem(a);
  712. if (b<>s2) then
  713. freemem(b);
  714. end;
  715. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  716. var
  717. a, b: ansistring;
  718. begin
  719. if (maxlen=0) then
  720. exit(0);
  721. setlength(a,maxlen);
  722. move(s1^,a[1],maxlen);
  723. setlength(b,maxlen);
  724. move(s2^,b[1],maxlen);
  725. result:=AnsiCompareText(a,b);
  726. end;
  727. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  728. var
  729. newlen: sizeint;
  730. begin
  731. newlen:=length(s);
  732. if newlen>strlen(orgp) then
  733. fpc_rangeerror;
  734. p:=orgp;
  735. if (newlen>0) then
  736. move(s[1],p[0],newlen);
  737. p[newlen]:=#0;
  738. end;
  739. function AnsiStrLower(Str: PChar): PChar;
  740. var
  741. temp: ansistring;
  742. begin
  743. temp:=loweransistring(str);
  744. ansi2pchar(temp,str,result);
  745. end;
  746. function AnsiStrUpper(Str: PChar): PChar;
  747. var
  748. temp: ansistring;
  749. begin
  750. temp:=upperansistring(str);
  751. ansi2pchar(temp,str,result);
  752. end;
  753. Procedure SetCWideStringManager;
  754. Var
  755. CWideStringManager : TUnicodeStringManager;
  756. begin
  757. CWideStringManager:=widestringmanager;
  758. With CWideStringManager do
  759. begin
  760. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  761. Ansi2WideMoveProc:=@Ansi2WideMove;
  762. UpperWideStringProc:=@UpperWideString;
  763. LowerWideStringProc:=@LowerWideString;
  764. CompareWideStringProc:=@CompareWideString;
  765. CompareTextWideStringProc:=@CompareTextWideString;
  766. CharLengthPCharProc:=@CharLengthPChar;
  767. CodePointLengthProc:=@CodePointLength;
  768. UpperAnsiStringProc:=@UpperAnsiString;
  769. LowerAnsiStringProc:=@LowerAnsiString;
  770. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  771. CompareTextAnsiStringProc:=@AnsiCompareText;
  772. StrCompAnsiStringProc:=@StrCompAnsi;
  773. StrICompAnsiStringProc:=@AnsiStrIComp;
  774. StrLCompAnsiStringProc:=@AnsiStrLComp;
  775. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  776. StrLowerAnsiStringProc:=@AnsiStrLower;
  777. StrUpperAnsiStringProc:=@AnsiStrUpper;
  778. ThreadInitProc:=@InitThread;
  779. ThreadFiniProc:=@FiniThread;
  780. { Unicode }
  781. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  782. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  783. UpperUnicodeStringProc:=@UpperWideString;
  784. LowerUnicodeStringProc:=@LowerWideString;
  785. CompareUnicodeStringProc:=@CompareWideString;
  786. CompareTextUnicodeStringProc:=@CompareTextWideString;
  787. end;
  788. SetUnicodeStringManager(CWideStringManager);
  789. end;
  790. initialization
  791. SetCWideStringManager;
  792. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  793. { with the information from the environment variables according to POSIX }
  794. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  795. setlocale(LC_ALL,'');
  796. { set the DefaultSystemCodePage }
  797. DefaultSystemCodePage:=iconv2win(ansistring(nl_langinfo(CODESET)));
  798. { init conversion tables for main program }
  799. InitThread;
  800. finalization
  801. { fini conversion tables for main program }
  802. FiniThread;
  803. end.