cwstring.pp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$ifndef symobi}
  19. {$linklib c}
  20. {$endif}
  21. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  22. {$if not defined(symobi)}
  23. {$if defined(haiku)}
  24. {$linklib textencoding}
  25. {$linklib locale}
  26. {$else}
  27. {$linklib iconv}
  28. {$endif}
  29. {$endif}
  30. {$define useiconv}
  31. {$endif linux}
  32. Uses
  33. BaseUnix,
  34. ctypes,
  35. unix,
  36. unixtype,
  37. initc;
  38. Const
  39. {$ifndef useiconv}
  40. libiconvname='c'; // is in libc under Linux.
  41. {$else}
  42. {$ifdef haiku}
  43. libiconvname='textencoding'; // is in libtextencoding under Haiku
  44. {$else}
  45. {$ifdef symobi}
  46. libiconvname='citrus';
  47. {$else}
  48. libiconvname='iconv';
  49. {$endif}
  50. {$endif}
  51. {$endif}
  52. { helper functions from libc }
  53. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  54. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  55. {$ifndef symobi}
  56. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  57. {$endif}
  58. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  59. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  60. {$ifdef symobi}
  61. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external libiconvname name 'mbrtowc';
  62. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external libiconvname name 'wcrtomb';
  63. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external libiconvname name 'mbrlen';
  64. {$else}
  65. {$ifndef beos}
  66. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  67. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  68. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  69. {$else beos}
  70. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  71. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  72. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  73. {$endif beos}
  74. {$endif}
  75. const
  76. {$ifdef linux}
  77. __LC_CTYPE = 0;
  78. LC_ALL = 6;
  79. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  80. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  81. CODESET = _NL_CTYPE_CODESET_NAME;
  82. {$else linux}
  83. {$ifdef darwin}
  84. CODESET = 0;
  85. LC_ALL = 0;
  86. {$else darwin}
  87. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  88. __LC_CTYPE = 0;
  89. LC_ALL = 0;
  90. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  91. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  92. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  93. {$else freebsd}
  94. {$ifdef solaris}
  95. CODESET=49;
  96. LC_ALL = 6;
  97. {$else solaris}
  98. {$ifdef beos}
  99. {$warning check correct value for BeOS}
  100. CODESET=49;
  101. {$ifdef haiku}
  102. LC_ALL = 0; // Checked for Haiku
  103. {$else}
  104. LC_ALL = 6; // Checked for BeOS
  105. {$endif}
  106. ESysEILSEQ = EILSEQ;
  107. {$else}
  108. {$ifdef symobi}
  109. CODESET = 51;
  110. LC_ALL = 0;
  111. {$else}
  112. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  113. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  114. {$endif symobi}
  115. {$endif beos}
  116. {$endif solaris}
  117. {$endif FreeBSD}
  118. {$endif darwin}
  119. {$endif linux}
  120. { unicode encoding name }
  121. {$ifdef FPC_LITTLE_ENDIAN}
  122. unicode_encoding2 = 'UTF-16LE';
  123. unicode_encoding4 = 'UCS-4LE';
  124. {$else FPC_LITTLE_ENDIAN}
  125. unicode_encoding2 = 'UTF-16BE';
  126. unicode_encoding4 = 'UCS-4BE';
  127. {$endif FPC_LITTLE_ENDIAN}
  128. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  129. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  130. { that's a libc macro mapped to internal functions/variables }
  131. { and thus not a stable external API on systems where libc }
  132. { breaks backwards compatibility every now and then }
  133. MB_CUR_MAX = 10;
  134. type
  135. piconv_t = ^iconv_t;
  136. iconv_t = pointer;
  137. nl_item = cint;
  138. {$ifdef haiku}
  139. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  140. {$else}
  141. {$ifndef beos}
  142. {$ifndef symobi}
  143. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  144. {$else}
  145. function nl_langinfo(__item:nl_item):pchar;cdecl;external clib name 'nl_langinfo';
  146. {$endif}
  147. {$endif}
  148. {$endif}
  149. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32)) or defined(symobi)}
  150. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  151. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  152. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  153. {$else}
  154. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  155. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  156. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  157. {$endif}
  158. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  159. threadvar
  160. iconv_ansi2wide,
  161. iconv_wide2ansi : iconv_t;
  162. {$if defined(beos) and not defined(haiku)}
  163. function nl_langinfo(__item:nl_item):pchar;
  164. begin
  165. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  166. // Now implement the minimum required to correctly initialize WideString support
  167. case __item of
  168. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  169. else
  170. begin
  171. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  172. Result := '';
  173. end;
  174. end;
  175. end;
  176. {$endif}
  177. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  178. var
  179. outlength,
  180. outoffset,
  181. srclen,
  182. outleft : size_t;
  183. srcpos : pwidechar;
  184. destpos: pchar;
  185. mynil : pchar;
  186. my0 : size_t;
  187. err: cint;
  188. begin
  189. {$ifndef VER2_2}
  190. if PtrInt(iconv_wide2ansi)=-1 then
  191. begin
  192. DefaultUnicode2AnsiMove(source,dest,len);
  193. exit;
  194. end;
  195. {$endif VER2_2}
  196. mynil:=nil;
  197. my0:=0;
  198. { rought estimation }
  199. setlength(dest,len*3);
  200. outlength:=len*3;
  201. srclen:=len*2;
  202. srcpos:=source;
  203. destpos:=pchar(dest);
  204. outleft:=outlength;
  205. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  206. begin
  207. err:=fpgetCerrno;
  208. case err of
  209. { last character is incomplete sequence }
  210. ESysEINVAL,
  211. { incomplete sequence in the middle }
  212. ESysEILSEQ:
  213. begin
  214. { skip and set to '?' }
  215. inc(srcpos);
  216. dec(srclen,2);
  217. destpos^:='?';
  218. inc(destpos);
  219. dec(outleft);
  220. { reset }
  221. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  222. if err=ESysEINVAL then
  223. break;
  224. end;
  225. ESysE2BIG:
  226. begin
  227. outoffset:=destpos-pchar(dest);
  228. { extend }
  229. setlength(dest,outlength+len*3);
  230. inc(outleft,len*3);
  231. inc(outlength,len*3);
  232. { string could have been moved }
  233. destpos:=pchar(dest)+outoffset;
  234. end;
  235. else
  236. runerror(231);
  237. end;
  238. end;
  239. // truncate string
  240. setlength(dest,length(dest)-outleft);
  241. end;
  242. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  243. var
  244. outlength,
  245. outoffset,
  246. outleft : size_t;
  247. srcpos,
  248. destpos: pchar;
  249. mynil : pchar;
  250. my0 : size_t;
  251. err: cint;
  252. begin
  253. {$ifndef VER2_2}
  254. if PtrInt(iconv_ansi2wide)=-1 then
  255. begin
  256. DefaultAnsi2UnicodeMove(source,dest,len);
  257. exit;
  258. end;
  259. {$endif VER2_2}
  260. mynil:=nil;
  261. my0:=0;
  262. // extra space
  263. outlength:=len+1;
  264. setlength(dest,outlength);
  265. srcpos:=source;
  266. destpos:=pchar(dest);
  267. outleft:=outlength*2;
  268. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  269. begin
  270. err:=fpgetCerrno;
  271. case err of
  272. ESysEINVAL,
  273. ESysEILSEQ:
  274. begin
  275. { skip and set to '?' }
  276. inc(srcpos);
  277. dec(len);
  278. pwidechar(destpos)^:='?';
  279. inc(destpos,2);
  280. dec(outleft,2);
  281. { reset }
  282. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  283. if err=ESysEINVAL then
  284. break;
  285. end;
  286. ESysE2BIG:
  287. begin
  288. outoffset:=destpos-pchar(dest);
  289. { extend }
  290. setlength(dest,outlength+len);
  291. inc(outleft,len*2);
  292. inc(outlength,len);
  293. { string could have been moved }
  294. destpos:=pchar(dest)+outoffset;
  295. end;
  296. else
  297. runerror(231);
  298. end;
  299. end;
  300. // truncate string
  301. setlength(dest,length(dest)-outleft div 2);
  302. end;
  303. function LowerWideString(const s : WideString) : WideString;
  304. var
  305. i : SizeInt;
  306. begin
  307. SetLength(result,length(s));
  308. for i:=0 to length(s)-1 do
  309. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  310. end;
  311. function UpperWideString(const s : WideString) : WideString;
  312. var
  313. i : SizeInt;
  314. begin
  315. SetLength(result,length(s));
  316. for i:=0 to length(s)-1 do
  317. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  318. end;
  319. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  320. begin
  321. if (len>length(s)) then
  322. if (length(s) < 10*256) then
  323. setlength(s,length(s)+10)
  324. else
  325. setlength(s,length(s)+length(s) shr 8);
  326. end;
  327. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  328. begin
  329. EnsureAnsiLen(s,index);
  330. pchar(@s[index])^:=c;
  331. inc(index);
  332. end;
  333. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  334. {$ifndef beos}
  335. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  336. {$else not beos}
  337. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  338. {$endif beos}
  339. var
  340. p : pchar;
  341. mblen : size_t;
  342. begin
  343. { we know that s is unique -> avoid uniquestring calls}
  344. p:=@s[index];
  345. if (nc<=127) then
  346. ConcatCharToAnsiStr(char(nc),s,index)
  347. else
  348. begin
  349. EnsureAnsiLen(s,index+MB_CUR_MAX);
  350. {$ifndef beos}
  351. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  352. {$else not beos}
  353. mblen:=wctomb(p,wchar_t(nc));
  354. {$endif not beos}
  355. if (mblen<>size_t(-1)) then
  356. inc(index,mblen)
  357. else
  358. begin
  359. { invalid wide char }
  360. p^:='?';
  361. inc(index);
  362. end;
  363. end;
  364. end;
  365. function LowerAnsiString(const s : AnsiString) : AnsiString;
  366. var
  367. i, slen,
  368. resindex : SizeInt;
  369. mblen : size_t;
  370. {$ifndef beos}
  371. ombstate,
  372. nmbstate : mbstate_t;
  373. {$endif beos}
  374. wc : wchar_t;
  375. begin
  376. {$ifndef beos}
  377. fillchar(ombstate,sizeof(ombstate),0);
  378. fillchar(nmbstate,sizeof(nmbstate),0);
  379. {$endif beos}
  380. slen:=length(s);
  381. SetLength(result,slen+10);
  382. i:=1;
  383. resindex:=1;
  384. while (i<=slen) do
  385. begin
  386. if (s[i]<=#127) then
  387. begin
  388. wc:=wchar_t(s[i]);
  389. mblen:= 1;
  390. end
  391. else
  392. {$ifndef beos}
  393. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  394. {$else not beos}
  395. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  396. {$endif not beos}
  397. case mblen of
  398. size_t(-2):
  399. begin
  400. { partial invalid character, copy literally }
  401. while (i<=slen) do
  402. begin
  403. ConcatCharToAnsiStr(s[i],result,resindex);
  404. inc(i);
  405. end;
  406. end;
  407. size_t(-1), 0:
  408. begin
  409. { invalid or null character }
  410. ConcatCharToAnsiStr(s[i],result,resindex);
  411. inc(i);
  412. end;
  413. else
  414. begin
  415. { a valid sequence }
  416. { even if mblen = 1, the lowercase version may have a }
  417. { different length }
  418. { We can't do anything special if wchar_t is 16 bit... }
  419. {$ifndef beos}
  420. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  421. {$else not beos}
  422. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  423. {$endif not beos}
  424. inc(i,mblen);
  425. end;
  426. end;
  427. end;
  428. SetLength(result,resindex-1);
  429. end;
  430. function UpperAnsiString(const s : AnsiString) : AnsiString;
  431. var
  432. i, slen,
  433. resindex : SizeInt;
  434. mblen : size_t;
  435. {$ifndef beos}
  436. ombstate,
  437. nmbstate : mbstate_t;
  438. {$endif beos}
  439. wc : wchar_t;
  440. begin
  441. {$ifndef beos}
  442. fillchar(ombstate,sizeof(ombstate),0);
  443. fillchar(nmbstate,sizeof(nmbstate),0);
  444. {$endif beos}
  445. slen:=length(s);
  446. SetLength(result,slen+10);
  447. i:=1;
  448. resindex:=1;
  449. while (i<=slen) do
  450. begin
  451. if (s[i]<=#127) then
  452. begin
  453. wc:=wchar_t(s[i]);
  454. mblen:= 1;
  455. end
  456. else
  457. {$ifndef beos}
  458. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  459. {$else not beos}
  460. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  461. {$endif beos}
  462. case mblen of
  463. size_t(-2):
  464. begin
  465. { partial invalid character, copy literally }
  466. while (i<=slen) do
  467. begin
  468. ConcatCharToAnsiStr(s[i],result,resindex);
  469. inc(i);
  470. end;
  471. end;
  472. size_t(-1), 0:
  473. begin
  474. { invalid or null character }
  475. ConcatCharToAnsiStr(s[i],result,resindex);
  476. inc(i);
  477. end;
  478. else
  479. begin
  480. { a valid sequence }
  481. { even if mblen = 1, the uppercase version may have a }
  482. { different length }
  483. { We can't do anything special if wchar_t is 16 bit... }
  484. {$ifndef beos}
  485. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  486. {$else not beos}
  487. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  488. {$endif not beos}
  489. inc(i,mblen);
  490. end;
  491. end;
  492. end;
  493. SetLength(result,resindex-1);
  494. end;
  495. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  496. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  497. var
  498. i, slen,
  499. destindex : SizeInt;
  500. len : longint;
  501. uch : UCS4Char;
  502. begin
  503. slen:=length(s);
  504. setlength(result,slen+1);
  505. i:=1;
  506. destindex:=0;
  507. while (i<=slen) do
  508. begin
  509. uch:=utf16toutf32(s,i,len);
  510. if (uch=UCS4Char(0)) then
  511. uch:=UCS4Char(32);
  512. result[destindex]:=uch;
  513. inc(destindex);
  514. inc(i,len);
  515. end;
  516. result[destindex]:=UCS4Char(0);
  517. { destindex <= slen }
  518. setlength(result,destindex+1);
  519. end;
  520. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  521. {$ifdef symobi}
  522. // Symobi doesn't currently define wcscoll(), so we do a simple comparison
  523. // taken from GenericAnsiCompareStr in objpas/sysutils/sysstr.inc
  524. Var
  525. I,L1,L2 : SizeInt;
  526. begin
  527. Result:=0;
  528. L1:=Length(S1);
  529. L2:=Length(S2);
  530. I:=1;
  531. While (Result=0) and ((I<=L1) and (I<=L2)) do
  532. begin
  533. Result:=Ord(S1[I])-Ord(S2[I]); //!! Must be replaced by ansi characters !!
  534. Inc(I);
  535. end;
  536. If Result=0 Then
  537. Result:=L1-L2;
  538. end;
  539. {$else}
  540. var
  541. hs1,hs2 : UCS4String;
  542. begin
  543. { wcscoll interprets null chars as end-of-string -> filter out }
  544. hs1:=WideStringToUCS4StringNoNulls(s1);
  545. hs2:=WideStringToUCS4StringNoNulls(s2);
  546. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  547. end;
  548. {$endif}
  549. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  550. begin
  551. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  552. end;
  553. function CharLengthPChar(const Str: PChar): PtrInt;
  554. var
  555. nextlen: ptrint;
  556. s: pchar;
  557. {$ifndef beos}
  558. mbstate: mbstate_t;
  559. {$endif not beos}
  560. begin
  561. result:=0;
  562. s:=str;
  563. {$ifndef beos}
  564. fillchar(mbstate,sizeof(mbstate),0);
  565. {$endif not beos}
  566. repeat
  567. {$ifdef beos}
  568. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  569. {$else beos}
  570. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  571. {$endif beos}
  572. { skip invalid/incomplete sequences }
  573. if (nextlen<0) then
  574. nextlen:=1;
  575. inc(result,nextlen);
  576. inc(s,nextlen);
  577. until (nextlen=0);
  578. end;
  579. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  580. var
  581. nextlen: ptrint;
  582. {$ifndef beos}
  583. mbstate: mbstate_t;
  584. {$endif not beos}
  585. begin
  586. {$ifdef beos}
  587. result:=ptrint(mblen(str,maxlookahead));
  588. {$else beos}
  589. fillchar(mbstate,sizeof(mbstate),0);
  590. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  591. { mbrlen can also return -2 for "incomplete but potially valid character
  592. and data has been processed" }
  593. if result<0 then
  594. result:=-1;
  595. {$endif beos}
  596. end;
  597. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  598. var
  599. a,b: pchar;
  600. i: PtrInt;
  601. begin
  602. if not(canmodifys1) then
  603. getmem(a,len1+1)
  604. else
  605. a:=s1;
  606. for i:=0 to len1-1 do
  607. if s1[i]<>#0 then
  608. a[i]:=s1[i]
  609. else
  610. a[i]:=#32;
  611. a[len1]:=#0;
  612. if not(canmodifys2) then
  613. getmem(b,len2+1)
  614. else
  615. b:=s2;
  616. for i:=0 to len2-1 do
  617. if s2[i]<>#0 then
  618. b[i]:=s2[i]
  619. else
  620. b[i]:=#32;
  621. b[len2]:=#0;
  622. result:=strcoll(a,b);
  623. if not(canmodifys1) then
  624. freemem(a);
  625. if not(canmodifys2) then
  626. freemem(b);
  627. end;
  628. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  629. begin
  630. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  631. end;
  632. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  633. begin
  634. result:=strcoll(s1,s2);
  635. end;
  636. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  637. var
  638. a, b: AnsiString;
  639. begin
  640. a:=UpperAnsistring(s1);
  641. b:=UpperAnsistring(s2);
  642. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  643. end;
  644. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  645. begin
  646. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  647. end;
  648. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  649. var
  650. a, b: pchar;
  651. begin
  652. if (maxlen=0) then
  653. exit(0);
  654. if (s1[maxlen]<>#0) then
  655. begin
  656. getmem(a,maxlen+1);
  657. move(s1^,a^,maxlen);
  658. a[maxlen]:=#0;
  659. end
  660. else
  661. a:=s1;
  662. if (s2[maxlen]<>#0) then
  663. begin
  664. getmem(b,maxlen+1);
  665. move(s2^,b^,maxlen);
  666. b[maxlen]:=#0;
  667. end
  668. else
  669. b:=s2;
  670. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  671. if (a<>s1) then
  672. freemem(a);
  673. if (b<>s2) then
  674. freemem(b);
  675. end;
  676. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  677. var
  678. a, b: ansistring;
  679. begin
  680. if (maxlen=0) then
  681. exit(0);
  682. setlength(a,maxlen);
  683. move(s1^,a[1],maxlen);
  684. setlength(b,maxlen);
  685. move(s2^,b[1],maxlen);
  686. result:=AnsiCompareText(a,b);
  687. end;
  688. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  689. var
  690. newlen: sizeint;
  691. begin
  692. newlen:=length(s);
  693. if newlen>strlen(orgp) then
  694. fpc_rangeerror;
  695. p:=orgp;
  696. if (newlen>0) then
  697. move(s[1],p[0],newlen);
  698. p[newlen]:=#0;
  699. end;
  700. function AnsiStrLower(Str: PChar): PChar;
  701. var
  702. temp: ansistring;
  703. begin
  704. temp:=loweransistring(str);
  705. ansi2pchar(temp,str,result);
  706. end;
  707. function AnsiStrUpper(Str: PChar): PChar;
  708. var
  709. temp: ansistring;
  710. begin
  711. temp:=upperansistring(str);
  712. ansi2pchar(temp,str,result);
  713. end;
  714. procedure InitThread;
  715. begin
  716. {$if not(defined(darwin) and defined(arm))}
  717. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  718. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  719. {$else}
  720. { Unix locale settings are ignored on iPhoneOS }
  721. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  722. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  723. {$endif}
  724. end;
  725. procedure FiniThread;
  726. begin
  727. if (iconv_wide2ansi <> iconv_t(-1)) then
  728. iconv_close(iconv_wide2ansi);
  729. if (iconv_ansi2wide <> iconv_t(-1)) then
  730. iconv_close(iconv_ansi2wide);
  731. end;
  732. Procedure SetCWideStringManager;
  733. Var
  734. CWideStringManager : TUnicodeStringManager;
  735. begin
  736. CWideStringManager:=widestringmanager;
  737. With CWideStringManager do
  738. begin
  739. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  740. Ansi2WideMoveProc:=@Ansi2WideMove;
  741. UpperWideStringProc:=@UpperWideString;
  742. LowerWideStringProc:=@LowerWideString;
  743. CompareWideStringProc:=@CompareWideString;
  744. CompareTextWideStringProc:=@CompareTextWideString;
  745. CharLengthPCharProc:=@CharLengthPChar;
  746. CodePointLengthProc:=@CodePointLength;
  747. UpperAnsiStringProc:=@UpperAnsiString;
  748. LowerAnsiStringProc:=@LowerAnsiString;
  749. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  750. CompareTextAnsiStringProc:=@AnsiCompareText;
  751. StrCompAnsiStringProc:=@StrCompAnsi;
  752. StrICompAnsiStringProc:=@AnsiStrIComp;
  753. StrLCompAnsiStringProc:=@AnsiStrLComp;
  754. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  755. StrLowerAnsiStringProc:=@AnsiStrLower;
  756. StrUpperAnsiStringProc:=@AnsiStrUpper;
  757. ThreadInitProc:=@InitThread;
  758. ThreadFiniProc:=@FiniThread;
  759. { Unicode }
  760. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  761. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  762. UpperUnicodeStringProc:=@UpperWideString;
  763. LowerUnicodeStringProc:=@LowerWideString;
  764. CompareUnicodeStringProc:=@CompareWideString;
  765. CompareTextUnicodeStringProc:=@CompareTextWideString;
  766. end;
  767. SetUnicodeStringManager(CWideStringManager);
  768. end;
  769. initialization
  770. SetCWideStringManager;
  771. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  772. { with the information from the environment variables according to POSIX }
  773. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  774. setlocale(LC_ALL,'');
  775. { init conversion tables for main program }
  776. InitThread;
  777. finalization
  778. { fini conversion tables for main program }
  779. FiniThread;
  780. end.