cwstring.pp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(haiku)}
  21. {$linklib textencoding}
  22. {$linklib locale}
  23. {$else}
  24. {$linklib iconv}
  25. {$endif}
  26. {$define useiconv}
  27. {$endif linux}
  28. Uses
  29. BaseUnix,
  30. ctypes,
  31. unix,
  32. unixtype,
  33. initc;
  34. Const
  35. {$ifndef useiconv}
  36. libiconvname='c'; // is in libc under Linux.
  37. {$else}
  38. {$ifdef haiku}
  39. libiconvname='textencoding'; // is in libtextencoding under Haiku
  40. {$else}
  41. libiconvname='iconv';
  42. {$endif}
  43. {$endif}
  44. { helper functions from libc }
  45. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  46. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  47. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  48. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  49. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  50. {$ifndef beos}
  51. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  52. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  53. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  54. {$else beos}
  55. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  56. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  57. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  58. {$endif beos}
  59. const
  60. {$ifdef linux}
  61. __LC_CTYPE = 0;
  62. LC_ALL = 6;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = _NL_CTYPE_CODESET_NAME;
  66. {$else linux}
  67. {$ifdef darwin}
  68. CODESET = 0;
  69. LC_ALL = 0;
  70. {$else darwin}
  71. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  72. __LC_CTYPE = 0;
  73. LC_ALL = 0;
  74. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  75. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  76. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  77. {$else freebsd}
  78. {$ifdef solaris}
  79. CODESET=49;
  80. LC_ALL = 6;
  81. {$else solaris}
  82. {$ifdef beos}
  83. {$warning check correct value for BeOS}
  84. CODESET=49;
  85. {$ifdef haiku}
  86. LC_ALL = 0; // Checked for Haiku
  87. {$else}
  88. LC_ALL = 6; // Checked for BeOS
  89. {$endif}
  90. ESysEILSEQ = EILSEQ;
  91. {$else}
  92. {$ifdef OpenBSD}
  93. CODESET = 51;
  94. LC_ALL = 0;
  95. {$else not OpenBSD}
  96. {$ifdef NetBSD}
  97. CODESET = 51;
  98. LC_ALL = 0;
  99. {$else not NetBSD}
  100. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  101. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  102. {$endif NetBSD}
  103. {$endif OpenBSD}
  104. {$endif beos}
  105. {$endif solaris}
  106. {$endif FreeBSD}
  107. {$endif darwin}
  108. {$endif linux}
  109. { unicode encoding name }
  110. {$ifdef FPC_LITTLE_ENDIAN}
  111. unicode_encoding2 = 'UTF-16LE';
  112. unicode_encoding4 = 'UCS-4LE';
  113. {$else FPC_LITTLE_ENDIAN}
  114. unicode_encoding2 = 'UTF-16BE';
  115. unicode_encoding4 = 'UCS-4BE';
  116. {$endif FPC_LITTLE_ENDIAN}
  117. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  118. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  119. { that's a libc macro mapped to internal functions/variables }
  120. { and thus not a stable external API on systems where libc }
  121. { breaks backwards compatibility every now and then }
  122. MB_CUR_MAX = 10;
  123. type
  124. piconv_t = ^iconv_t;
  125. iconv_t = pointer;
  126. nl_item = cint;
  127. {$ifdef haiku}
  128. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  129. {$else}
  130. {$ifndef beos}
  131. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  132. {$endif}
  133. {$endif}
  134. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  135. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  136. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  137. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  138. {$else}
  139. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  140. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  141. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  142. {$endif}
  143. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  144. threadvar
  145. iconv_ansi2wide,
  146. iconv_wide2ansi : iconv_t;
  147. {$if defined(beos) and not defined(haiku)}
  148. function nl_langinfo(__item:nl_item):pchar;
  149. begin
  150. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  151. // Now implement the minimum required to correctly initialize WideString support
  152. case __item of
  153. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  154. else
  155. begin
  156. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  157. Result := '';
  158. end;
  159. end;
  160. end;
  161. {$endif}
  162. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  163. var
  164. outlength,
  165. outoffset,
  166. srclen,
  167. outleft : size_t;
  168. srcpos : pwidechar;
  169. destpos: pchar;
  170. mynil : pchar;
  171. my0 : size_t;
  172. err: cint;
  173. begin
  174. {$ifndef VER2_2}
  175. if PtrInt(iconv_wide2ansi)=-1 then
  176. begin
  177. DefaultUnicode2AnsiMove(source,dest,len);
  178. exit;
  179. end;
  180. {$endif VER2_2}
  181. mynil:=nil;
  182. my0:=0;
  183. { rought estimation }
  184. setlength(dest,len*3);
  185. outlength:=len*3;
  186. srclen:=len*2;
  187. srcpos:=source;
  188. destpos:=pchar(dest);
  189. outleft:=outlength;
  190. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  191. begin
  192. err:=fpgetCerrno;
  193. case err of
  194. { last character is incomplete sequence }
  195. ESysEINVAL,
  196. { incomplete sequence in the middle }
  197. ESysEILSEQ:
  198. begin
  199. { skip and set to '?' }
  200. inc(srcpos);
  201. dec(srclen,2);
  202. destpos^:='?';
  203. inc(destpos);
  204. dec(outleft);
  205. { reset }
  206. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  207. if err=ESysEINVAL then
  208. break;
  209. end;
  210. ESysE2BIG:
  211. begin
  212. outoffset:=destpos-pchar(dest);
  213. { extend }
  214. setlength(dest,outlength+len*3);
  215. inc(outleft,len*3);
  216. inc(outlength,len*3);
  217. { string could have been moved }
  218. destpos:=pchar(dest)+outoffset;
  219. end;
  220. else
  221. runerror(231);
  222. end;
  223. end;
  224. // truncate string
  225. setlength(dest,length(dest)-outleft);
  226. end;
  227. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  228. var
  229. outlength,
  230. outoffset,
  231. outleft : size_t;
  232. srcpos,
  233. destpos: pchar;
  234. mynil : pchar;
  235. my0 : size_t;
  236. err: cint;
  237. begin
  238. {$ifndef VER2_2}
  239. if PtrInt(iconv_ansi2wide)=-1 then
  240. begin
  241. DefaultAnsi2UnicodeMove(source,dest,len);
  242. exit;
  243. end;
  244. {$endif VER2_2}
  245. mynil:=nil;
  246. my0:=0;
  247. // extra space
  248. outlength:=len+1;
  249. setlength(dest,outlength);
  250. srcpos:=source;
  251. destpos:=pchar(dest);
  252. outleft:=outlength*2;
  253. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  254. begin
  255. err:=fpgetCerrno;
  256. case err of
  257. ESysEINVAL,
  258. ESysEILSEQ:
  259. begin
  260. { skip and set to '?' }
  261. inc(srcpos);
  262. dec(len);
  263. pwidechar(destpos)^:='?';
  264. inc(destpos,2);
  265. dec(outleft,2);
  266. { reset }
  267. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  268. if err=ESysEINVAL then
  269. break;
  270. end;
  271. ESysE2BIG:
  272. begin
  273. outoffset:=destpos-pchar(dest);
  274. { extend }
  275. setlength(dest,outlength+len);
  276. inc(outleft,len*2);
  277. inc(outlength,len);
  278. { string could have been moved }
  279. destpos:=pchar(dest)+outoffset;
  280. end;
  281. else
  282. runerror(231);
  283. end;
  284. end;
  285. // truncate string
  286. setlength(dest,length(dest)-outleft div 2);
  287. end;
  288. function LowerWideString(const s : WideString) : WideString;
  289. var
  290. i : SizeInt;
  291. begin
  292. SetLength(result,length(s));
  293. for i:=0 to length(s)-1 do
  294. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  295. end;
  296. function UpperWideString(const s : WideString) : WideString;
  297. var
  298. i : SizeInt;
  299. begin
  300. SetLength(result,length(s));
  301. for i:=0 to length(s)-1 do
  302. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  303. end;
  304. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  305. begin
  306. if (len>length(s)) then
  307. if (length(s) < 10*256) then
  308. setlength(s,length(s)+10)
  309. else
  310. setlength(s,length(s)+length(s) shr 8);
  311. end;
  312. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  313. begin
  314. EnsureAnsiLen(s,index);
  315. pchar(@s[index])^:=c;
  316. inc(index);
  317. end;
  318. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  319. {$ifndef beos}
  320. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  321. {$else not beos}
  322. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  323. {$endif beos}
  324. var
  325. p : pchar;
  326. mblen : size_t;
  327. begin
  328. { we know that s is unique -> avoid uniquestring calls}
  329. p:=@s[index];
  330. if (nc<=127) then
  331. ConcatCharToAnsiStr(char(nc),s,index)
  332. else
  333. begin
  334. EnsureAnsiLen(s,index+MB_CUR_MAX);
  335. {$ifndef beos}
  336. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  337. {$else not beos}
  338. mblen:=wctomb(p,wchar_t(nc));
  339. {$endif not beos}
  340. if (mblen<>size_t(-1)) then
  341. inc(index,mblen)
  342. else
  343. begin
  344. { invalid wide char }
  345. p^:='?';
  346. inc(index);
  347. end;
  348. end;
  349. end;
  350. function LowerAnsiString(const s : AnsiString) : AnsiString;
  351. var
  352. i, slen,
  353. resindex : SizeInt;
  354. mblen : size_t;
  355. {$ifndef beos}
  356. ombstate,
  357. nmbstate : mbstate_t;
  358. {$endif beos}
  359. wc : wchar_t;
  360. begin
  361. {$ifndef beos}
  362. fillchar(ombstate,sizeof(ombstate),0);
  363. fillchar(nmbstate,sizeof(nmbstate),0);
  364. {$endif beos}
  365. slen:=length(s);
  366. SetLength(result,slen+10);
  367. i:=1;
  368. resindex:=1;
  369. while (i<=slen) do
  370. begin
  371. if (s[i]<=#127) then
  372. begin
  373. wc:=wchar_t(s[i]);
  374. mblen:= 1;
  375. end
  376. else
  377. {$ifndef beos}
  378. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  379. {$else not beos}
  380. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  381. {$endif not beos}
  382. case mblen of
  383. size_t(-2):
  384. begin
  385. { partial invalid character, copy literally }
  386. while (i<=slen) do
  387. begin
  388. ConcatCharToAnsiStr(s[i],result,resindex);
  389. inc(i);
  390. end;
  391. end;
  392. size_t(-1), 0:
  393. begin
  394. { invalid or null character }
  395. ConcatCharToAnsiStr(s[i],result,resindex);
  396. inc(i);
  397. end;
  398. else
  399. begin
  400. { a valid sequence }
  401. { even if mblen = 1, the lowercase version may have a }
  402. { different length }
  403. { We can't do anything special if wchar_t is 16 bit... }
  404. {$ifndef beos}
  405. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  406. {$else not beos}
  407. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  408. {$endif not beos}
  409. inc(i,mblen);
  410. end;
  411. end;
  412. end;
  413. SetLength(result,resindex-1);
  414. end;
  415. function UpperAnsiString(const s : AnsiString) : AnsiString;
  416. var
  417. i, slen,
  418. resindex : SizeInt;
  419. mblen : size_t;
  420. {$ifndef beos}
  421. ombstate,
  422. nmbstate : mbstate_t;
  423. {$endif beos}
  424. wc : wchar_t;
  425. begin
  426. {$ifndef beos}
  427. fillchar(ombstate,sizeof(ombstate),0);
  428. fillchar(nmbstate,sizeof(nmbstate),0);
  429. {$endif beos}
  430. slen:=length(s);
  431. SetLength(result,slen+10);
  432. i:=1;
  433. resindex:=1;
  434. while (i<=slen) do
  435. begin
  436. if (s[i]<=#127) then
  437. begin
  438. wc:=wchar_t(s[i]);
  439. mblen:= 1;
  440. end
  441. else
  442. {$ifndef beos}
  443. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  444. {$else not beos}
  445. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  446. {$endif beos}
  447. case mblen of
  448. size_t(-2):
  449. begin
  450. { partial invalid character, copy literally }
  451. while (i<=slen) do
  452. begin
  453. ConcatCharToAnsiStr(s[i],result,resindex);
  454. inc(i);
  455. end;
  456. end;
  457. size_t(-1), 0:
  458. begin
  459. { invalid or null character }
  460. ConcatCharToAnsiStr(s[i],result,resindex);
  461. inc(i);
  462. end;
  463. else
  464. begin
  465. { a valid sequence }
  466. { even if mblen = 1, the uppercase version may have a }
  467. { different length }
  468. { We can't do anything special if wchar_t is 16 bit... }
  469. {$ifndef beos}
  470. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  471. {$else not beos}
  472. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  473. {$endif not beos}
  474. inc(i,mblen);
  475. end;
  476. end;
  477. end;
  478. SetLength(result,resindex-1);
  479. end;
  480. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  481. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  482. var
  483. i, slen,
  484. destindex : SizeInt;
  485. len : longint;
  486. uch : UCS4Char;
  487. begin
  488. slen:=length(s);
  489. setlength(result,slen+1);
  490. i:=1;
  491. destindex:=0;
  492. while (i<=slen) do
  493. begin
  494. uch:=utf16toutf32(s,i,len);
  495. if (uch=UCS4Char(0)) then
  496. uch:=UCS4Char(32);
  497. result[destindex]:=uch;
  498. inc(destindex);
  499. inc(i,len);
  500. end;
  501. result[destindex]:=UCS4Char(0);
  502. { destindex <= slen }
  503. setlength(result,destindex+1);
  504. end;
  505. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  506. var
  507. hs1,hs2 : UCS4String;
  508. begin
  509. { wcscoll interprets null chars as end-of-string -> filter out }
  510. hs1:=WideStringToUCS4StringNoNulls(s1);
  511. hs2:=WideStringToUCS4StringNoNulls(s2);
  512. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  513. end;
  514. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  515. begin
  516. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  517. end;
  518. function CharLengthPChar(const Str: PChar): PtrInt;
  519. var
  520. nextlen: ptrint;
  521. s: pchar;
  522. {$ifndef beos}
  523. mbstate: mbstate_t;
  524. {$endif not beos}
  525. begin
  526. result:=0;
  527. s:=str;
  528. {$ifndef beos}
  529. fillchar(mbstate,sizeof(mbstate),0);
  530. {$endif not beos}
  531. repeat
  532. {$ifdef beos}
  533. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  534. {$else beos}
  535. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  536. {$endif beos}
  537. { skip invalid/incomplete sequences }
  538. if (nextlen<0) then
  539. nextlen:=1;
  540. inc(result,nextlen);
  541. inc(s,nextlen);
  542. until (nextlen=0);
  543. end;
  544. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  545. var
  546. nextlen: ptrint;
  547. {$ifndef beos}
  548. mbstate: mbstate_t;
  549. {$endif not beos}
  550. begin
  551. {$ifdef beos}
  552. result:=ptrint(mblen(str,maxlookahead));
  553. {$else beos}
  554. fillchar(mbstate,sizeof(mbstate),0);
  555. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  556. { mbrlen can also return -2 for "incomplete but potially valid character
  557. and data has been processed" }
  558. if result<0 then
  559. result:=-1;
  560. {$endif beos}
  561. end;
  562. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  563. var
  564. a,b: pchar;
  565. i: PtrInt;
  566. begin
  567. if not(canmodifys1) then
  568. getmem(a,len1+1)
  569. else
  570. a:=s1;
  571. for i:=0 to len1-1 do
  572. if s1[i]<>#0 then
  573. a[i]:=s1[i]
  574. else
  575. a[i]:=#32;
  576. a[len1]:=#0;
  577. if not(canmodifys2) then
  578. getmem(b,len2+1)
  579. else
  580. b:=s2;
  581. for i:=0 to len2-1 do
  582. if s2[i]<>#0 then
  583. b[i]:=s2[i]
  584. else
  585. b[i]:=#32;
  586. b[len2]:=#0;
  587. result:=strcoll(a,b);
  588. if not(canmodifys1) then
  589. freemem(a);
  590. if not(canmodifys2) then
  591. freemem(b);
  592. end;
  593. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  594. begin
  595. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  596. end;
  597. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  598. begin
  599. result:=strcoll(s1,s2);
  600. end;
  601. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  602. var
  603. a, b: AnsiString;
  604. begin
  605. a:=UpperAnsistring(s1);
  606. b:=UpperAnsistring(s2);
  607. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  608. end;
  609. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  610. begin
  611. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  612. end;
  613. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  614. var
  615. a, b: pchar;
  616. begin
  617. if (maxlen=0) then
  618. exit(0);
  619. if (s1[maxlen]<>#0) then
  620. begin
  621. getmem(a,maxlen+1);
  622. move(s1^,a^,maxlen);
  623. a[maxlen]:=#0;
  624. end
  625. else
  626. a:=s1;
  627. if (s2[maxlen]<>#0) then
  628. begin
  629. getmem(b,maxlen+1);
  630. move(s2^,b^,maxlen);
  631. b[maxlen]:=#0;
  632. end
  633. else
  634. b:=s2;
  635. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  636. if (a<>s1) then
  637. freemem(a);
  638. if (b<>s2) then
  639. freemem(b);
  640. end;
  641. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  642. var
  643. a, b: ansistring;
  644. begin
  645. if (maxlen=0) then
  646. exit(0);
  647. setlength(a,maxlen);
  648. move(s1^,a[1],maxlen);
  649. setlength(b,maxlen);
  650. move(s2^,b[1],maxlen);
  651. result:=AnsiCompareText(a,b);
  652. end;
  653. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  654. var
  655. newlen: sizeint;
  656. begin
  657. newlen:=length(s);
  658. if newlen>strlen(orgp) then
  659. fpc_rangeerror;
  660. p:=orgp;
  661. if (newlen>0) then
  662. move(s[1],p[0],newlen);
  663. p[newlen]:=#0;
  664. end;
  665. function AnsiStrLower(Str: PChar): PChar;
  666. var
  667. temp: ansistring;
  668. begin
  669. temp:=loweransistring(str);
  670. ansi2pchar(temp,str,result);
  671. end;
  672. function AnsiStrUpper(Str: PChar): PChar;
  673. var
  674. temp: ansistring;
  675. begin
  676. temp:=upperansistring(str);
  677. ansi2pchar(temp,str,result);
  678. end;
  679. procedure InitThread;
  680. begin
  681. {$if not(defined(darwin) and defined(arm))}
  682. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  683. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  684. {$else}
  685. { Unix locale settings are ignored on iPhoneOS }
  686. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  687. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  688. {$endif}
  689. end;
  690. procedure FiniThread;
  691. begin
  692. if (iconv_wide2ansi <> iconv_t(-1)) then
  693. iconv_close(iconv_wide2ansi);
  694. if (iconv_ansi2wide <> iconv_t(-1)) then
  695. iconv_close(iconv_ansi2wide);
  696. end;
  697. Procedure SetCWideStringManager;
  698. Var
  699. CWideStringManager : TUnicodeStringManager;
  700. begin
  701. CWideStringManager:=widestringmanager;
  702. With CWideStringManager do
  703. begin
  704. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  705. Ansi2WideMoveProc:=@Ansi2WideMove;
  706. UpperWideStringProc:=@UpperWideString;
  707. LowerWideStringProc:=@LowerWideString;
  708. CompareWideStringProc:=@CompareWideString;
  709. CompareTextWideStringProc:=@CompareTextWideString;
  710. CharLengthPCharProc:=@CharLengthPChar;
  711. CodePointLengthProc:=@CodePointLength;
  712. UpperAnsiStringProc:=@UpperAnsiString;
  713. LowerAnsiStringProc:=@LowerAnsiString;
  714. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  715. CompareTextAnsiStringProc:=@AnsiCompareText;
  716. StrCompAnsiStringProc:=@StrCompAnsi;
  717. StrICompAnsiStringProc:=@AnsiStrIComp;
  718. StrLCompAnsiStringProc:=@AnsiStrLComp;
  719. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  720. StrLowerAnsiStringProc:=@AnsiStrLower;
  721. StrUpperAnsiStringProc:=@AnsiStrUpper;
  722. ThreadInitProc:=@InitThread;
  723. ThreadFiniProc:=@FiniThread;
  724. { Unicode }
  725. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  726. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  727. UpperUnicodeStringProc:=@UpperWideString;
  728. LowerUnicodeStringProc:=@LowerWideString;
  729. CompareUnicodeStringProc:=@CompareWideString;
  730. CompareTextUnicodeStringProc:=@CompareTextWideString;
  731. end;
  732. SetUnicodeStringManager(CWideStringManager);
  733. end;
  734. initialization
  735. SetCWideStringManager;
  736. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  737. { with the information from the environment variables according to POSIX }
  738. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  739. setlocale(LC_ALL,'');
  740. { init conversion tables for main program }
  741. InitThread;
  742. finalization
  743. { fini conversion tables for main program }
  744. FiniThread;
  745. end.