cwstring.pp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(haiku)}
  21. {$linklib textencoding}
  22. {$linklib locale}
  23. {$else}
  24. {$linklib iconv}
  25. {$endif}
  26. {$define useiconv}
  27. {$endif linux}
  28. Uses
  29. BaseUnix,
  30. ctypes,
  31. unix,
  32. unixtype,
  33. initc;
  34. Const
  35. {$ifndef useiconv}
  36. libiconvname='c'; // is in libc under Linux.
  37. {$else}
  38. {$ifdef haiku}
  39. libiconvname='textencoding'; // is in libtextencoding under Haiku
  40. {$else}
  41. libiconvname='iconv';
  42. {$endif}
  43. {$endif}
  44. { helper functions from libc }
  45. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  46. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  47. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  48. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  49. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  50. {$ifndef beos}
  51. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  52. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  53. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  54. {$else beos}
  55. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  56. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  57. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  58. {$endif beos}
  59. const
  60. {$ifdef linux}
  61. __LC_CTYPE = 0;
  62. LC_ALL = 6;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = _NL_CTYPE_CODESET_NAME;
  66. {$else linux}
  67. {$ifdef darwin}
  68. CODESET = 0;
  69. LC_ALL = 0;
  70. {$else darwin}
  71. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  72. __LC_CTYPE = 0;
  73. LC_ALL = 0;
  74. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  75. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  76. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  77. {$else freebsd}
  78. {$ifdef solaris}
  79. CODESET=49;
  80. LC_ALL = 6;
  81. {$else solaris}
  82. {$ifdef beos}
  83. {$warning check correct value for BeOS}
  84. CODESET=49;
  85. {$ifdef haiku}
  86. LC_ALL = 0; // Checked for Haiku
  87. {$else}
  88. LC_ALL = 6; // Checked for BeOS
  89. {$endif}
  90. ESysEILSEQ = EILSEQ;
  91. {$else}
  92. {$ifdef OpenBSD}
  93. CODESET = 51;
  94. LC_ALL = 0;
  95. {$else not OpenBSD}
  96. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  97. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  98. {$endif OpenBSD}
  99. {$endif beos}
  100. {$endif solaris}
  101. {$endif FreeBSD}
  102. {$endif darwin}
  103. {$endif linux}
  104. { unicode encoding name }
  105. {$ifdef FPC_LITTLE_ENDIAN}
  106. unicode_encoding2 = 'UTF-16LE';
  107. unicode_encoding4 = 'UCS-4LE';
  108. {$else FPC_LITTLE_ENDIAN}
  109. unicode_encoding2 = 'UTF-16BE';
  110. unicode_encoding4 = 'UCS-4BE';
  111. {$endif FPC_LITTLE_ENDIAN}
  112. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  113. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  114. { that's a libc macro mapped to internal functions/variables }
  115. { and thus not a stable external API on systems where libc }
  116. { breaks backwards compatibility every now and then }
  117. MB_CUR_MAX = 10;
  118. type
  119. piconv_t = ^iconv_t;
  120. iconv_t = pointer;
  121. nl_item = cint;
  122. {$ifdef haiku}
  123. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  124. {$else}
  125. {$ifndef beos}
  126. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  127. {$endif}
  128. {$endif}
  129. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  130. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  131. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  132. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  133. {$else}
  134. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  135. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  136. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  137. {$endif}
  138. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  139. threadvar
  140. iconv_ansi2wide,
  141. iconv_wide2ansi : iconv_t;
  142. {$if defined(beos) and not defined(haiku)}
  143. function nl_langinfo(__item:nl_item):pchar;
  144. begin
  145. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  146. // Now implement the minimum required to correctly initialize WideString support
  147. case __item of
  148. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  149. else
  150. begin
  151. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  152. Result := '';
  153. end;
  154. end;
  155. end;
  156. {$endif}
  157. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  158. var
  159. outlength,
  160. outoffset,
  161. srclen,
  162. outleft : size_t;
  163. srcpos : pwidechar;
  164. destpos: pchar;
  165. mynil : pchar;
  166. my0 : size_t;
  167. err: cint;
  168. begin
  169. {$ifndef VER2_2}
  170. if PtrInt(iconv_wide2ansi)=-1 then
  171. begin
  172. DefaultUnicode2AnsiMove(source,dest,len);
  173. exit;
  174. end;
  175. {$endif VER2_2}
  176. mynil:=nil;
  177. my0:=0;
  178. { rought estimation }
  179. setlength(dest,len*3);
  180. outlength:=len*3;
  181. srclen:=len*2;
  182. srcpos:=source;
  183. destpos:=pchar(dest);
  184. outleft:=outlength;
  185. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  186. begin
  187. err:=fpgetCerrno;
  188. case err of
  189. { last character is incomplete sequence }
  190. ESysEINVAL,
  191. { incomplete sequence in the middle }
  192. ESysEILSEQ:
  193. begin
  194. { skip and set to '?' }
  195. inc(srcpos);
  196. dec(srclen,2);
  197. destpos^:='?';
  198. inc(destpos);
  199. dec(outleft);
  200. { reset }
  201. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  202. if err=ESysEINVAL then
  203. break;
  204. end;
  205. ESysE2BIG:
  206. begin
  207. outoffset:=destpos-pchar(dest);
  208. { extend }
  209. setlength(dest,outlength+len*3);
  210. inc(outleft,len*3);
  211. inc(outlength,len*3);
  212. { string could have been moved }
  213. destpos:=pchar(dest)+outoffset;
  214. end;
  215. else
  216. runerror(231);
  217. end;
  218. end;
  219. // truncate string
  220. setlength(dest,length(dest)-outleft);
  221. end;
  222. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  223. var
  224. outlength,
  225. outoffset,
  226. outleft : size_t;
  227. srcpos,
  228. destpos: pchar;
  229. mynil : pchar;
  230. my0 : size_t;
  231. err: cint;
  232. begin
  233. {$ifndef VER2_2}
  234. if PtrInt(iconv_ansi2wide)=-1 then
  235. begin
  236. DefaultAnsi2UnicodeMove(source,dest,len);
  237. exit;
  238. end;
  239. {$endif VER2_2}
  240. mynil:=nil;
  241. my0:=0;
  242. // extra space
  243. outlength:=len+1;
  244. setlength(dest,outlength);
  245. srcpos:=source;
  246. destpos:=pchar(dest);
  247. outleft:=outlength*2;
  248. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  249. begin
  250. err:=fpgetCerrno;
  251. case err of
  252. ESysEINVAL,
  253. ESysEILSEQ:
  254. begin
  255. { skip and set to '?' }
  256. inc(srcpos);
  257. dec(len);
  258. pwidechar(destpos)^:='?';
  259. inc(destpos,2);
  260. dec(outleft,2);
  261. { reset }
  262. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  263. if err=ESysEINVAL then
  264. break;
  265. end;
  266. ESysE2BIG:
  267. begin
  268. outoffset:=destpos-pchar(dest);
  269. { extend }
  270. setlength(dest,outlength+len);
  271. inc(outleft,len*2);
  272. inc(outlength,len);
  273. { string could have been moved }
  274. destpos:=pchar(dest)+outoffset;
  275. end;
  276. else
  277. runerror(231);
  278. end;
  279. end;
  280. // truncate string
  281. setlength(dest,length(dest)-outleft div 2);
  282. end;
  283. function LowerWideString(const s : WideString) : WideString;
  284. var
  285. i : SizeInt;
  286. begin
  287. SetLength(result,length(s));
  288. for i:=0 to length(s)-1 do
  289. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  290. end;
  291. function UpperWideString(const s : WideString) : WideString;
  292. var
  293. i : SizeInt;
  294. begin
  295. SetLength(result,length(s));
  296. for i:=0 to length(s)-1 do
  297. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  298. end;
  299. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  300. begin
  301. if (len>length(s)) then
  302. if (length(s) < 10*256) then
  303. setlength(s,length(s)+10)
  304. else
  305. setlength(s,length(s)+length(s) shr 8);
  306. end;
  307. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  308. begin
  309. EnsureAnsiLen(s,index);
  310. pchar(@s[index])^:=c;
  311. inc(index);
  312. end;
  313. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  314. {$ifndef beos}
  315. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  316. {$else not beos}
  317. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  318. {$endif beos}
  319. var
  320. p : pchar;
  321. mblen : size_t;
  322. begin
  323. { we know that s is unique -> avoid uniquestring calls}
  324. p:=@s[index];
  325. if (nc<=127) then
  326. ConcatCharToAnsiStr(char(nc),s,index)
  327. else
  328. begin
  329. EnsureAnsiLen(s,index+MB_CUR_MAX);
  330. {$ifndef beos}
  331. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  332. {$else not beos}
  333. mblen:=wctomb(p,wchar_t(nc));
  334. {$endif not beos}
  335. if (mblen<>size_t(-1)) then
  336. inc(index,mblen)
  337. else
  338. begin
  339. { invalid wide char }
  340. p^:='?';
  341. inc(index);
  342. end;
  343. end;
  344. end;
  345. function LowerAnsiString(const s : AnsiString) : AnsiString;
  346. var
  347. i, slen,
  348. resindex : SizeInt;
  349. mblen : size_t;
  350. {$ifndef beos}
  351. ombstate,
  352. nmbstate : mbstate_t;
  353. {$endif beos}
  354. wc : wchar_t;
  355. begin
  356. {$ifndef beos}
  357. fillchar(ombstate,sizeof(ombstate),0);
  358. fillchar(nmbstate,sizeof(nmbstate),0);
  359. {$endif beos}
  360. slen:=length(s);
  361. SetLength(result,slen+10);
  362. i:=1;
  363. resindex:=1;
  364. while (i<=slen) do
  365. begin
  366. if (s[i]<=#127) then
  367. begin
  368. wc:=wchar_t(s[i]);
  369. mblen:= 1;
  370. end
  371. else
  372. {$ifndef beos}
  373. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  374. {$else not beos}
  375. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  376. {$endif not beos}
  377. case mblen of
  378. size_t(-2):
  379. begin
  380. { partial invalid character, copy literally }
  381. while (i<=slen) do
  382. begin
  383. ConcatCharToAnsiStr(s[i],result,resindex);
  384. inc(i);
  385. end;
  386. end;
  387. size_t(-1), 0:
  388. begin
  389. { invalid or null character }
  390. ConcatCharToAnsiStr(s[i],result,resindex);
  391. inc(i);
  392. end;
  393. else
  394. begin
  395. { a valid sequence }
  396. { even if mblen = 1, the lowercase version may have a }
  397. { different length }
  398. { We can't do anything special if wchar_t is 16 bit... }
  399. {$ifndef beos}
  400. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  401. {$else not beos}
  402. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  403. {$endif not beos}
  404. inc(i,mblen);
  405. end;
  406. end;
  407. end;
  408. SetLength(result,resindex-1);
  409. end;
  410. function UpperAnsiString(const s : AnsiString) : AnsiString;
  411. var
  412. i, slen,
  413. resindex : SizeInt;
  414. mblen : size_t;
  415. {$ifndef beos}
  416. ombstate,
  417. nmbstate : mbstate_t;
  418. {$endif beos}
  419. wc : wchar_t;
  420. begin
  421. {$ifndef beos}
  422. fillchar(ombstate,sizeof(ombstate),0);
  423. fillchar(nmbstate,sizeof(nmbstate),0);
  424. {$endif beos}
  425. slen:=length(s);
  426. SetLength(result,slen+10);
  427. i:=1;
  428. resindex:=1;
  429. while (i<=slen) do
  430. begin
  431. if (s[i]<=#127) then
  432. begin
  433. wc:=wchar_t(s[i]);
  434. mblen:= 1;
  435. end
  436. else
  437. {$ifndef beos}
  438. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  439. {$else not beos}
  440. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  441. {$endif beos}
  442. case mblen of
  443. size_t(-2):
  444. begin
  445. { partial invalid character, copy literally }
  446. while (i<=slen) do
  447. begin
  448. ConcatCharToAnsiStr(s[i],result,resindex);
  449. inc(i);
  450. end;
  451. end;
  452. size_t(-1), 0:
  453. begin
  454. { invalid or null character }
  455. ConcatCharToAnsiStr(s[i],result,resindex);
  456. inc(i);
  457. end;
  458. else
  459. begin
  460. { a valid sequence }
  461. { even if mblen = 1, the uppercase version may have a }
  462. { different length }
  463. { We can't do anything special if wchar_t is 16 bit... }
  464. {$ifndef beos}
  465. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  466. {$else not beos}
  467. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  468. {$endif not beos}
  469. inc(i,mblen);
  470. end;
  471. end;
  472. end;
  473. SetLength(result,resindex-1);
  474. end;
  475. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  476. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  477. var
  478. i, slen,
  479. destindex : SizeInt;
  480. len : longint;
  481. uch : UCS4Char;
  482. begin
  483. slen:=length(s);
  484. setlength(result,slen+1);
  485. i:=1;
  486. destindex:=0;
  487. while (i<=slen) do
  488. begin
  489. uch:=utf16toutf32(s,i,len);
  490. if (uch=UCS4Char(0)) then
  491. uch:=UCS4Char(32);
  492. result[destindex]:=uch;
  493. inc(destindex);
  494. inc(i,len);
  495. end;
  496. result[destindex]:=UCS4Char(0);
  497. { destindex <= slen }
  498. setlength(result,destindex+1);
  499. end;
  500. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  501. var
  502. hs1,hs2 : UCS4String;
  503. begin
  504. { wcscoll interprets null chars as end-of-string -> filter out }
  505. hs1:=WideStringToUCS4StringNoNulls(s1);
  506. hs2:=WideStringToUCS4StringNoNulls(s2);
  507. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  508. end;
  509. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  510. begin
  511. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  512. end;
  513. function CharLengthPChar(const Str: PChar): PtrInt;
  514. var
  515. nextlen: ptrint;
  516. s: pchar;
  517. {$ifndef beos}
  518. mbstate: mbstate_t;
  519. {$endif not beos}
  520. begin
  521. result:=0;
  522. s:=str;
  523. {$ifndef beos}
  524. fillchar(mbstate,sizeof(mbstate),0);
  525. {$endif not beos}
  526. repeat
  527. {$ifdef beos}
  528. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  529. {$else beos}
  530. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  531. {$endif beos}
  532. { skip invalid/incomplete sequences }
  533. if (nextlen<0) then
  534. nextlen:=1;
  535. inc(result,nextlen);
  536. inc(s,nextlen);
  537. until (nextlen=0);
  538. end;
  539. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  540. var
  541. nextlen: ptrint;
  542. {$ifndef beos}
  543. mbstate: mbstate_t;
  544. {$endif not beos}
  545. begin
  546. {$ifdef beos}
  547. result:=ptrint(mblen(str,maxlookahead));
  548. {$else beos}
  549. fillchar(mbstate,sizeof(mbstate),0);
  550. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  551. { mbrlen can also return -2 for "incomplete but potially valid character
  552. and data has been processed" }
  553. if result<0 then
  554. result:=-1;
  555. {$endif beos}
  556. end;
  557. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  558. var
  559. a,b: pchar;
  560. i: PtrInt;
  561. begin
  562. if not(canmodifys1) then
  563. getmem(a,len1+1)
  564. else
  565. a:=s1;
  566. for i:=0 to len1-1 do
  567. if s1[i]<>#0 then
  568. a[i]:=s1[i]
  569. else
  570. a[i]:=#32;
  571. a[len1]:=#0;
  572. if not(canmodifys2) then
  573. getmem(b,len2+1)
  574. else
  575. b:=s2;
  576. for i:=0 to len2-1 do
  577. if s2[i]<>#0 then
  578. b[i]:=s2[i]
  579. else
  580. b[i]:=#32;
  581. b[len2]:=#0;
  582. result:=strcoll(a,b);
  583. if not(canmodifys1) then
  584. freemem(a);
  585. if not(canmodifys2) then
  586. freemem(b);
  587. end;
  588. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  589. begin
  590. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  591. end;
  592. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  593. begin
  594. result:=strcoll(s1,s2);
  595. end;
  596. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  597. var
  598. a, b: AnsiString;
  599. begin
  600. a:=UpperAnsistring(s1);
  601. b:=UpperAnsistring(s2);
  602. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  603. end;
  604. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  605. begin
  606. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  607. end;
  608. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  609. var
  610. a, b: pchar;
  611. begin
  612. if (maxlen=0) then
  613. exit(0);
  614. if (s1[maxlen]<>#0) then
  615. begin
  616. getmem(a,maxlen+1);
  617. move(s1^,a^,maxlen);
  618. a[maxlen]:=#0;
  619. end
  620. else
  621. a:=s1;
  622. if (s2[maxlen]<>#0) then
  623. begin
  624. getmem(b,maxlen+1);
  625. move(s2^,b^,maxlen);
  626. b[maxlen]:=#0;
  627. end
  628. else
  629. b:=s2;
  630. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  631. if (a<>s1) then
  632. freemem(a);
  633. if (b<>s2) then
  634. freemem(b);
  635. end;
  636. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  637. var
  638. a, b: ansistring;
  639. begin
  640. if (maxlen=0) then
  641. exit(0);
  642. setlength(a,maxlen);
  643. move(s1^,a[1],maxlen);
  644. setlength(b,maxlen);
  645. move(s2^,b[1],maxlen);
  646. result:=AnsiCompareText(a,b);
  647. end;
  648. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  649. var
  650. newlen: sizeint;
  651. begin
  652. newlen:=length(s);
  653. if newlen>strlen(orgp) then
  654. fpc_rangeerror;
  655. p:=orgp;
  656. if (newlen>0) then
  657. move(s[1],p[0],newlen);
  658. p[newlen]:=#0;
  659. end;
  660. function AnsiStrLower(Str: PChar): PChar;
  661. var
  662. temp: ansistring;
  663. begin
  664. temp:=loweransistring(str);
  665. ansi2pchar(temp,str,result);
  666. end;
  667. function AnsiStrUpper(Str: PChar): PChar;
  668. var
  669. temp: ansistring;
  670. begin
  671. temp:=upperansistring(str);
  672. ansi2pchar(temp,str,result);
  673. end;
  674. procedure InitThread;
  675. begin
  676. {$if not(defined(darwin) and defined(arm))}
  677. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  678. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  679. {$else}
  680. { Unix locale settings are ignored on iPhoneOS }
  681. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  682. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  683. {$endif}
  684. end;
  685. procedure FiniThread;
  686. begin
  687. if (iconv_wide2ansi <> iconv_t(-1)) then
  688. iconv_close(iconv_wide2ansi);
  689. if (iconv_ansi2wide <> iconv_t(-1)) then
  690. iconv_close(iconv_ansi2wide);
  691. end;
  692. Procedure SetCWideStringManager;
  693. Var
  694. CWideStringManager : TUnicodeStringManager;
  695. begin
  696. CWideStringManager:=widestringmanager;
  697. With CWideStringManager do
  698. begin
  699. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  700. Ansi2WideMoveProc:=@Ansi2WideMove;
  701. UpperWideStringProc:=@UpperWideString;
  702. LowerWideStringProc:=@LowerWideString;
  703. CompareWideStringProc:=@CompareWideString;
  704. CompareTextWideStringProc:=@CompareTextWideString;
  705. CharLengthPCharProc:=@CharLengthPChar;
  706. CodePointLengthProc:=@CodePointLength;
  707. UpperAnsiStringProc:=@UpperAnsiString;
  708. LowerAnsiStringProc:=@LowerAnsiString;
  709. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  710. CompareTextAnsiStringProc:=@AnsiCompareText;
  711. StrCompAnsiStringProc:=@StrCompAnsi;
  712. StrICompAnsiStringProc:=@AnsiStrIComp;
  713. StrLCompAnsiStringProc:=@AnsiStrLComp;
  714. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  715. StrLowerAnsiStringProc:=@AnsiStrLower;
  716. StrUpperAnsiStringProc:=@AnsiStrUpper;
  717. ThreadInitProc:=@InitThread;
  718. ThreadFiniProc:=@FiniThread;
  719. { Unicode }
  720. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  721. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  722. UpperUnicodeStringProc:=@UpperWideString;
  723. LowerUnicodeStringProc:=@LowerWideString;
  724. CompareUnicodeStringProc:=@CompareWideString;
  725. CompareTextUnicodeStringProc:=@CompareTextWideString;
  726. end;
  727. SetUnicodeStringManager(CWideStringManager);
  728. end;
  729. initialization
  730. SetCWideStringManager;
  731. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  732. { with the information from the environment variables according to POSIX }
  733. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  734. setlocale(LC_ALL,'');
  735. { init conversion tables for main program }
  736. InitThread;
  737. finalization
  738. { fini conversion tables for main program }
  739. FiniThread;
  740. end.