cwstring.pp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(haiku)}
  21. {$linklib textencoding}
  22. {$linklib locale}
  23. {$else}
  24. {$linklib iconv}
  25. {$endif}
  26. {$define useiconv}
  27. {$endif linux}
  28. Uses
  29. BaseUnix,
  30. ctypes,
  31. unix,
  32. unixtype,
  33. initc;
  34. Const
  35. {$ifndef useiconv}
  36. libiconvname='c'; // is in libc under Linux.
  37. {$else}
  38. {$ifdef haiku}
  39. libiconvname='textencoding'; // is in libtextencoding under Haiku
  40. {$else}
  41. libiconvname='iconv';
  42. {$endif}
  43. {$endif}
  44. { helper functions from libc }
  45. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  46. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  47. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  48. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  49. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  50. {$ifndef beos}
  51. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  52. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  53. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  54. {$else beos}
  55. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  56. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  57. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  58. {$endif beos}
  59. const
  60. {$ifdef linux}
  61. __LC_CTYPE = 0;
  62. LC_ALL = 6;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = _NL_CTYPE_CODESET_NAME;
  66. {$else linux}
  67. {$ifdef darwin}
  68. CODESET = 0;
  69. LC_ALL = 0;
  70. {$else darwin}
  71. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  72. __LC_CTYPE = 0;
  73. LC_ALL = 0;
  74. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  75. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  76. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  77. {$else freebsd}
  78. {$ifdef solaris}
  79. CODESET=49;
  80. LC_ALL = 6;
  81. {$else solaris}
  82. {$ifdef beos}
  83. {$warning check correct value for BeOS}
  84. CODESET=49;
  85. {$ifdef haiku}
  86. LC_ALL = 0; // Checked for Haiku
  87. {$else}
  88. LC_ALL = 6; // Checked for BeOS
  89. {$endif}
  90. ESysEILSEQ = EILSEQ;
  91. {$else}
  92. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  93. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  94. {$endif beos}
  95. {$endif solaris}
  96. {$endif FreeBSD}
  97. {$endif darwin}
  98. {$endif linux}
  99. { unicode encoding name }
  100. {$ifdef FPC_LITTLE_ENDIAN}
  101. unicode_encoding2 = 'UTF-16LE';
  102. unicode_encoding4 = 'UCS-4LE';
  103. {$else FPC_LITTLE_ENDIAN}
  104. unicode_encoding2 = 'UTF-16BE';
  105. unicode_encoding4 = 'UCS-4BE';
  106. {$endif FPC_LITTLE_ENDIAN}
  107. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  108. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  109. { that's a libc macro mapped to internal functions/variables }
  110. { and thus not a stable external API on systems where libc }
  111. { breaks backwards compatibility every now and then }
  112. MB_CUR_MAX = 10;
  113. type
  114. piconv_t = ^iconv_t;
  115. iconv_t = pointer;
  116. nl_item = cint;
  117. {$ifdef haiku}
  118. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  119. {$else}
  120. {$ifndef beos}
  121. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  122. {$endif}
  123. {$endif}
  124. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  125. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  126. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  127. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  128. {$else}
  129. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  130. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  131. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  132. {$endif}
  133. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  134. threadvar
  135. iconv_ansi2wide,
  136. iconv_wide2ansi : iconv_t;
  137. {$if defined(beos) and not defined(haiku)}
  138. function nl_langinfo(__item:nl_item):pchar;
  139. begin
  140. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  141. // Now implement the minimum required to correctly initialize WideString support
  142. case __item of
  143. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  144. else
  145. begin
  146. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  147. Result := '';
  148. end;
  149. end;
  150. end;
  151. {$endif}
  152. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  153. var
  154. outlength,
  155. outoffset,
  156. srclen,
  157. outleft : size_t;
  158. srcpos : pwidechar;
  159. destpos: pchar;
  160. mynil : pchar;
  161. my0 : size_t;
  162. err: cint;
  163. begin
  164. {$ifndef VER2_2}
  165. if PtrInt(iconv_wide2ansi)=-1 then
  166. begin
  167. DefaultUnicode2AnsiMove(source,dest,len);
  168. exit;
  169. end;
  170. {$endif VER2_2}
  171. mynil:=nil;
  172. my0:=0;
  173. { rought estimation }
  174. setlength(dest,len*3);
  175. outlength:=len*3;
  176. srclen:=len*2;
  177. srcpos:=source;
  178. destpos:=pchar(dest);
  179. outleft:=outlength;
  180. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  181. begin
  182. err:=fpgetCerrno;
  183. case err of
  184. { last character is incomplete sequence }
  185. ESysEINVAL,
  186. { incomplete sequence in the middle }
  187. ESysEILSEQ:
  188. begin
  189. { skip and set to '?' }
  190. inc(srcpos);
  191. dec(srclen,2);
  192. destpos^:='?';
  193. inc(destpos);
  194. dec(outleft);
  195. { reset }
  196. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  197. if err=ESysEINVAL then
  198. break;
  199. end;
  200. ESysE2BIG:
  201. begin
  202. outoffset:=destpos-pchar(dest);
  203. { extend }
  204. setlength(dest,outlength+len*3);
  205. inc(outleft,len*3);
  206. inc(outlength,len*3);
  207. { string could have been moved }
  208. destpos:=pchar(dest)+outoffset;
  209. end;
  210. else
  211. runerror(231);
  212. end;
  213. end;
  214. // truncate string
  215. setlength(dest,length(dest)-outleft);
  216. end;
  217. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  218. var
  219. outlength,
  220. outoffset,
  221. outleft : size_t;
  222. srcpos,
  223. destpos: pchar;
  224. mynil : pchar;
  225. my0 : size_t;
  226. err: cint;
  227. begin
  228. {$ifndef VER2_2}
  229. if PtrInt(iconv_ansi2wide)=-1 then
  230. begin
  231. DefaultAnsi2UnicodeMove(source,dest,len);
  232. exit;
  233. end;
  234. {$endif VER2_2}
  235. mynil:=nil;
  236. my0:=0;
  237. // extra space
  238. outlength:=len+1;
  239. setlength(dest,outlength);
  240. srcpos:=source;
  241. destpos:=pchar(dest);
  242. outleft:=outlength*2;
  243. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  244. begin
  245. err:=fpgetCerrno;
  246. case err of
  247. ESysEINVAL,
  248. ESysEILSEQ:
  249. begin
  250. { skip and set to '?' }
  251. inc(srcpos);
  252. dec(len);
  253. pwidechar(destpos)^:='?';
  254. inc(destpos,2);
  255. dec(outleft,2);
  256. { reset }
  257. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  258. if err=ESysEINVAL then
  259. break;
  260. end;
  261. ESysE2BIG:
  262. begin
  263. outoffset:=destpos-pchar(dest);
  264. { extend }
  265. setlength(dest,outlength+len);
  266. inc(outleft,len*2);
  267. inc(outlength,len);
  268. { string could have been moved }
  269. destpos:=pchar(dest)+outoffset;
  270. end;
  271. else
  272. runerror(231);
  273. end;
  274. end;
  275. // truncate string
  276. setlength(dest,length(dest)-outleft div 2);
  277. end;
  278. function LowerWideString(const s : WideString) : WideString;
  279. var
  280. i : SizeInt;
  281. begin
  282. SetLength(result,length(s));
  283. for i:=0 to length(s)-1 do
  284. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  285. end;
  286. function UpperWideString(const s : WideString) : WideString;
  287. var
  288. i : SizeInt;
  289. begin
  290. SetLength(result,length(s));
  291. for i:=0 to length(s)-1 do
  292. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  293. end;
  294. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  295. begin
  296. if (len>length(s)) then
  297. if (length(s) < 10*256) then
  298. setlength(s,length(s)+10)
  299. else
  300. setlength(s,length(s)+length(s) shr 8);
  301. end;
  302. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  303. begin
  304. EnsureAnsiLen(s,index);
  305. pchar(@s[index])^:=c;
  306. inc(index);
  307. end;
  308. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  309. {$ifndef beos}
  310. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  311. {$else not beos}
  312. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  313. {$endif beos}
  314. var
  315. p : pchar;
  316. mblen : size_t;
  317. begin
  318. { we know that s is unique -> avoid uniquestring calls}
  319. p:=@s[index];
  320. if (nc<=127) then
  321. ConcatCharToAnsiStr(char(nc),s,index)
  322. else
  323. begin
  324. EnsureAnsiLen(s,index+MB_CUR_MAX);
  325. {$ifndef beos}
  326. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  327. {$else not beos}
  328. mblen:=wctomb(p,wchar_t(nc));
  329. {$endif not beos}
  330. if (mblen<>size_t(-1)) then
  331. inc(index,mblen)
  332. else
  333. begin
  334. { invalid wide char }
  335. p^:='?';
  336. inc(index);
  337. end;
  338. end;
  339. end;
  340. function LowerAnsiString(const s : AnsiString) : AnsiString;
  341. var
  342. i, slen,
  343. resindex : SizeInt;
  344. mblen : size_t;
  345. {$ifndef beos}
  346. ombstate,
  347. nmbstate : mbstate_t;
  348. {$endif beos}
  349. wc : wchar_t;
  350. begin
  351. {$ifndef beos}
  352. fillchar(ombstate,sizeof(ombstate),0);
  353. fillchar(nmbstate,sizeof(nmbstate),0);
  354. {$endif beos}
  355. slen:=length(s);
  356. SetLength(result,slen+10);
  357. i:=1;
  358. resindex:=1;
  359. while (i<=slen) do
  360. begin
  361. if (s[i]<=#127) then
  362. begin
  363. wc:=wchar_t(s[i]);
  364. mblen:= 1;
  365. end
  366. else
  367. {$ifndef beos}
  368. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  369. {$else not beos}
  370. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  371. {$endif not beos}
  372. case mblen of
  373. size_t(-2):
  374. begin
  375. { partial invalid character, copy literally }
  376. while (i<=slen) do
  377. begin
  378. ConcatCharToAnsiStr(s[i],result,resindex);
  379. inc(i);
  380. end;
  381. end;
  382. size_t(-1), 0:
  383. begin
  384. { invalid or null character }
  385. ConcatCharToAnsiStr(s[i],result,resindex);
  386. inc(i);
  387. end;
  388. else
  389. begin
  390. { a valid sequence }
  391. { even if mblen = 1, the lowercase version may have a }
  392. { different length }
  393. { We can't do anything special if wchar_t is 16 bit... }
  394. {$ifndef beos}
  395. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  396. {$else not beos}
  397. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  398. {$endif not beos}
  399. inc(i,mblen);
  400. end;
  401. end;
  402. end;
  403. SetLength(result,resindex-1);
  404. end;
  405. function UpperAnsiString(const s : AnsiString) : AnsiString;
  406. var
  407. i, slen,
  408. resindex : SizeInt;
  409. mblen : size_t;
  410. {$ifndef beos}
  411. ombstate,
  412. nmbstate : mbstate_t;
  413. {$endif beos}
  414. wc : wchar_t;
  415. begin
  416. {$ifndef beos}
  417. fillchar(ombstate,sizeof(ombstate),0);
  418. fillchar(nmbstate,sizeof(nmbstate),0);
  419. {$endif beos}
  420. slen:=length(s);
  421. SetLength(result,slen+10);
  422. i:=1;
  423. resindex:=1;
  424. while (i<=slen) do
  425. begin
  426. if (s[i]<=#127) then
  427. begin
  428. wc:=wchar_t(s[i]);
  429. mblen:= 1;
  430. end
  431. else
  432. {$ifndef beos}
  433. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  434. {$else not beos}
  435. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  436. {$endif beos}
  437. case mblen of
  438. size_t(-2):
  439. begin
  440. { partial invalid character, copy literally }
  441. while (i<=slen) do
  442. begin
  443. ConcatCharToAnsiStr(s[i],result,resindex);
  444. inc(i);
  445. end;
  446. end;
  447. size_t(-1), 0:
  448. begin
  449. { invalid or null character }
  450. ConcatCharToAnsiStr(s[i],result,resindex);
  451. inc(i);
  452. end;
  453. else
  454. begin
  455. { a valid sequence }
  456. { even if mblen = 1, the uppercase version may have a }
  457. { different length }
  458. { We can't do anything special if wchar_t is 16 bit... }
  459. {$ifndef beos}
  460. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  461. {$else not beos}
  462. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  463. {$endif not beos}
  464. inc(i,mblen);
  465. end;
  466. end;
  467. end;
  468. SetLength(result,resindex-1);
  469. end;
  470. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  471. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  472. var
  473. i, slen,
  474. destindex : SizeInt;
  475. len : longint;
  476. uch : UCS4Char;
  477. begin
  478. slen:=length(s);
  479. setlength(result,slen+1);
  480. i:=1;
  481. destindex:=0;
  482. while (i<=slen) do
  483. begin
  484. uch:=utf16toutf32(s,i,len);
  485. if (uch=UCS4Char(0)) then
  486. uch:=UCS4Char(32);
  487. result[destindex]:=uch;
  488. inc(destindex);
  489. inc(i,len);
  490. end;
  491. result[destindex]:=UCS4Char(0);
  492. { destindex <= slen }
  493. setlength(result,destindex+1);
  494. end;
  495. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  496. var
  497. hs1,hs2 : UCS4String;
  498. begin
  499. { wcscoll interprets null chars as end-of-string -> filter out }
  500. hs1:=WideStringToUCS4StringNoNulls(s1);
  501. hs2:=WideStringToUCS4StringNoNulls(s2);
  502. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  503. end;
  504. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  505. begin
  506. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  507. end;
  508. function CharLengthPChar(const Str: PChar): PtrInt;
  509. var
  510. nextlen: ptrint;
  511. s: pchar;
  512. {$ifndef beos}
  513. mbstate: mbstate_t;
  514. {$endif not beos}
  515. begin
  516. result:=0;
  517. s:=str;
  518. {$ifndef beos}
  519. fillchar(mbstate,sizeof(mbstate),0);
  520. {$endif not beos}
  521. repeat
  522. {$ifdef beos}
  523. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  524. {$else beos}
  525. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  526. {$endif beos}
  527. { skip invalid/incomplete sequences }
  528. if (nextlen<0) then
  529. nextlen:=1;
  530. inc(result,nextlen);
  531. inc(s,nextlen);
  532. until (nextlen=0);
  533. end;
  534. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  535. var
  536. nextlen: ptrint;
  537. {$ifndef beos}
  538. mbstate: mbstate_t;
  539. {$endif not beos}
  540. begin
  541. {$ifdef beos}
  542. result:=ptrint(mblen(str,maxlookahead));
  543. {$else beos}
  544. fillchar(mbstate,sizeof(mbstate),0);
  545. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  546. { mbrlen can also return -2 for "incomplete but potially valid character
  547. and data has been processed" }
  548. if result<0 then
  549. result:=-1;
  550. {$endif beos}
  551. end;
  552. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  553. var
  554. a,b: pchar;
  555. i: PtrInt;
  556. begin
  557. if not(canmodifys1) then
  558. getmem(a,len1+1)
  559. else
  560. a:=s1;
  561. for i:=0 to len1-1 do
  562. if s1[i]<>#0 then
  563. a[i]:=s1[i]
  564. else
  565. a[i]:=#32;
  566. a[len1]:=#0;
  567. if not(canmodifys2) then
  568. getmem(b,len2+1)
  569. else
  570. b:=s2;
  571. for i:=0 to len2-1 do
  572. if s2[i]<>#0 then
  573. b[i]:=s2[i]
  574. else
  575. b[i]:=#32;
  576. b[len2]:=#0;
  577. result:=strcoll(a,b);
  578. if not(canmodifys1) then
  579. freemem(a);
  580. if not(canmodifys2) then
  581. freemem(b);
  582. end;
  583. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  584. begin
  585. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  586. end;
  587. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  588. begin
  589. result:=strcoll(s1,s2);
  590. end;
  591. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  592. var
  593. a, b: AnsiString;
  594. begin
  595. a:=UpperAnsistring(s1);
  596. b:=UpperAnsistring(s2);
  597. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  598. end;
  599. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  600. begin
  601. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  602. end;
  603. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  604. var
  605. a, b: pchar;
  606. begin
  607. if (maxlen=0) then
  608. exit(0);
  609. if (s1[maxlen]<>#0) then
  610. begin
  611. getmem(a,maxlen+1);
  612. move(s1^,a^,maxlen);
  613. a[maxlen]:=#0;
  614. end
  615. else
  616. a:=s1;
  617. if (s2[maxlen]<>#0) then
  618. begin
  619. getmem(b,maxlen+1);
  620. move(s2^,b^,maxlen);
  621. b[maxlen]:=#0;
  622. end
  623. else
  624. b:=s2;
  625. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  626. if (a<>s1) then
  627. freemem(a);
  628. if (b<>s2) then
  629. freemem(b);
  630. end;
  631. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  632. var
  633. a, b: ansistring;
  634. begin
  635. if (maxlen=0) then
  636. exit(0);
  637. setlength(a,maxlen);
  638. move(s1^,a[1],maxlen);
  639. setlength(b,maxlen);
  640. move(s2^,b[1],maxlen);
  641. result:=AnsiCompareText(a,b);
  642. end;
  643. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  644. var
  645. newlen: sizeint;
  646. begin
  647. newlen:=length(s);
  648. if newlen>strlen(orgp) then
  649. fpc_rangeerror;
  650. p:=orgp;
  651. if (newlen>0) then
  652. move(s[1],p[0],newlen);
  653. p[newlen]:=#0;
  654. end;
  655. function AnsiStrLower(Str: PChar): PChar;
  656. var
  657. temp: ansistring;
  658. begin
  659. temp:=loweransistring(str);
  660. ansi2pchar(temp,str,result);
  661. end;
  662. function AnsiStrUpper(Str: PChar): PChar;
  663. var
  664. temp: ansistring;
  665. begin
  666. temp:=upperansistring(str);
  667. ansi2pchar(temp,str,result);
  668. end;
  669. procedure InitThread;
  670. begin
  671. {$if not(defined(darwin) and defined(arm))}
  672. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  673. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  674. {$else}
  675. { Unix locale settings are ignored on iPhoneOS }
  676. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  677. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  678. {$endif}
  679. end;
  680. procedure FiniThread;
  681. begin
  682. if (iconv_wide2ansi <> iconv_t(-1)) then
  683. iconv_close(iconv_wide2ansi);
  684. if (iconv_ansi2wide <> iconv_t(-1)) then
  685. iconv_close(iconv_ansi2wide);
  686. end;
  687. Procedure SetCWideStringManager;
  688. Var
  689. CWideStringManager : TUnicodeStringManager;
  690. begin
  691. CWideStringManager:=widestringmanager;
  692. With CWideStringManager do
  693. begin
  694. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  695. Ansi2WideMoveProc:=@Ansi2WideMove;
  696. UpperWideStringProc:=@UpperWideString;
  697. LowerWideStringProc:=@LowerWideString;
  698. CompareWideStringProc:=@CompareWideString;
  699. CompareTextWideStringProc:=@CompareTextWideString;
  700. CharLengthPCharProc:=@CharLengthPChar;
  701. CodePointLengthProc:=@CodePointLength;
  702. UpperAnsiStringProc:=@UpperAnsiString;
  703. LowerAnsiStringProc:=@LowerAnsiString;
  704. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  705. CompareTextAnsiStringProc:=@AnsiCompareText;
  706. StrCompAnsiStringProc:=@StrCompAnsi;
  707. StrICompAnsiStringProc:=@AnsiStrIComp;
  708. StrLCompAnsiStringProc:=@AnsiStrLComp;
  709. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  710. StrLowerAnsiStringProc:=@AnsiStrLower;
  711. StrUpperAnsiStringProc:=@AnsiStrUpper;
  712. ThreadInitProc:=@InitThread;
  713. ThreadFiniProc:=@FiniThread;
  714. { Unicode }
  715. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  716. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  717. UpperUnicodeStringProc:=@UpperWideString;
  718. LowerUnicodeStringProc:=@LowerWideString;
  719. CompareUnicodeStringProc:=@CompareWideString;
  720. CompareTextUnicodeStringProc:=@CompareTextWideString;
  721. end;
  722. SetUnicodeStringManager(CWideStringManager);
  723. end;
  724. initialization
  725. SetCWideStringManager;
  726. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  727. { with the information from the environment variables according to POSIX }
  728. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  729. setlocale(LC_ALL,'');
  730. { init conversion tables for main program }
  731. InitThread;
  732. finalization
  733. { fini conversion tables for main program }
  734. FiniThread;
  735. end.