cwstring.pp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. unit cwstring;
  15. interface
  16. procedure SetCWidestringManager;
  17. implementation
  18. {$linklib c}
  19. // Linux (and maybe glibc platforms in general), have iconv in glibc.
  20. {$if defined(linux) or defined(solaris)}
  21. {$define iconv_is_in_libc}
  22. {$endif}
  23. {$ifdef netbsd}
  24. {$ifdef TEST_ICONV_LIBC}
  25. {$define iconv_is_in_libc}
  26. {$endif}
  27. {$endif}
  28. {$ifndef iconv_is_in_libc}
  29. {$if defined(haiku)}
  30. {$linklib textencoding}
  31. {$else}
  32. {$linklib iconv}
  33. {$endif}
  34. {$define useiconv}
  35. {$endif not iconv_is_in_libc}
  36. Uses
  37. BaseUnix,
  38. ctypes,
  39. unix,
  40. unixtype,
  41. initc;
  42. Const
  43. {$ifndef useiconv}
  44. libiconvname='c'; // is in libc under Linux.
  45. {$else}
  46. {$ifdef haiku}
  47. libiconvname='textencoding'; // is in libtextencoding under Haiku
  48. {$else}
  49. libiconvname='iconv';
  50. {$endif}
  51. {$endif}
  52. { helper functions from libc }
  53. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  54. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  55. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  56. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  57. {$ifdef netbsd}
  58. { NetBSD has a new setlocale function defined in /usr/include/locale.h
  59. that should be used }
  60. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name '__setlocale_mb_len_max_32';
  61. {$else}
  62. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  63. {$endif}
  64. {$ifndef beos}
  65. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  66. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  67. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  68. {$else beos}
  69. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  70. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  71. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  72. {$endif beos}
  73. const
  74. {$ifdef linux}
  75. __LC_CTYPE = 0;
  76. LC_ALL = 6;
  77. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  78. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  79. CODESET = _NL_CTYPE_CODESET_NAME;
  80. {$else linux}
  81. {$ifdef darwin}
  82. CODESET = 0;
  83. LC_ALL = 0;
  84. {$else darwin}
  85. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  86. __LC_CTYPE = 0;
  87. LC_ALL = 0;
  88. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  89. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  90. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  91. {$else freebsd}
  92. {$ifdef solaris}
  93. CODESET=49;
  94. LC_ALL = 6;
  95. {$else solaris}
  96. {$ifdef beos}
  97. {$warning check correct value for BeOS}
  98. CODESET=49;
  99. {$ifdef haiku}
  100. LC_ALL = 0; // Checked for Haiku
  101. {$else}
  102. LC_ALL = 6; // Checked for BeOS
  103. {$endif}
  104. ESysEILSEQ = EILSEQ;
  105. {$else}
  106. {$ifdef OpenBSD}
  107. CODESET = 51;
  108. LC_ALL = 0;
  109. {$else not OpenBSD}
  110. {$ifdef NetBSD}
  111. CODESET = 51;
  112. LC_ALL = 0;
  113. {$else not NetBSD}
  114. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  115. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  116. {$endif NetBSD}
  117. {$endif OpenBSD}
  118. {$endif beos}
  119. {$endif solaris}
  120. {$endif FreeBSD}
  121. {$endif darwin}
  122. {$endif linux}
  123. { unicode encoding name }
  124. {$ifdef FPC_LITTLE_ENDIAN}
  125. unicode_encoding2 = 'UTF-16LE';
  126. unicode_encoding4 = 'UCS-4LE';
  127. {$else FPC_LITTLE_ENDIAN}
  128. unicode_encoding2 = 'UTF-16BE';
  129. unicode_encoding4 = 'UCS-4BE';
  130. {$endif FPC_LITTLE_ENDIAN}
  131. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  132. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  133. { that's a libc macro mapped to internal functions/variables }
  134. { and thus not a stable external API on systems where libc }
  135. { breaks backwards compatibility every now and then }
  136. MB_CUR_MAX = 10;
  137. type
  138. piconv_t = ^iconv_t;
  139. iconv_t = pointer;
  140. nl_item = cint;
  141. {$ifdef haiku}
  142. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'root' name 'nl_langinfo';
  143. {$else}
  144. {$ifndef beos}
  145. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  146. {$endif}
  147. {$endif}
  148. {$if (not defined(bsd) and not defined(beos)) or defined(iconv_is_in_libc) or (defined(darwin) and not defined(cpupowerpc32))}
  149. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  150. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  151. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  152. {$else}
  153. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  154. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  155. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  156. {$endif}
  157. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  158. threadvar
  159. iconv_ansi2wide,
  160. iconv_wide2ansi : iconv_t;
  161. {$if defined(beos) and not defined(haiku)}
  162. function nl_langinfo(__item:nl_item):pchar;
  163. begin
  164. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  165. // Now implement the minimum required to correctly initialize WideString support
  166. case __item of
  167. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  168. else
  169. begin
  170. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  171. Result := '';
  172. end;
  173. end;
  174. end;
  175. {$endif}
  176. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  177. var
  178. outlength,
  179. outoffset,
  180. srclen,
  181. outleft : size_t;
  182. srcpos : pwidechar;
  183. destpos: pchar;
  184. mynil : pchar;
  185. my0 : size_t;
  186. err: cint;
  187. begin
  188. {$ifndef VER2_2}
  189. if PtrInt(iconv_wide2ansi)=-1 then
  190. begin
  191. DefaultUnicode2AnsiMove(source,dest,len);
  192. exit;
  193. end;
  194. {$endif VER2_2}
  195. mynil:=nil;
  196. my0:=0;
  197. { rought estimation }
  198. setlength(dest,len*3);
  199. outlength:=len*3;
  200. srclen:=len*2;
  201. srcpos:=source;
  202. destpos:=pchar(dest);
  203. outleft:=outlength;
  204. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  205. begin
  206. err:=fpgetCerrno;
  207. case err of
  208. { last character is incomplete sequence }
  209. ESysEINVAL,
  210. { incomplete sequence in the middle }
  211. ESysEILSEQ:
  212. begin
  213. { skip and set to '?' }
  214. inc(srcpos);
  215. dec(srclen,2);
  216. destpos^:='?';
  217. inc(destpos);
  218. dec(outleft);
  219. { reset }
  220. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  221. if err=ESysEINVAL then
  222. break;
  223. end;
  224. ESysE2BIG:
  225. begin
  226. outoffset:=destpos-pchar(dest);
  227. { extend }
  228. setlength(dest,outlength+len*3);
  229. inc(outleft,len*3);
  230. inc(outlength,len*3);
  231. { string could have been moved }
  232. destpos:=pchar(dest)+outoffset;
  233. end;
  234. else
  235. runerror(231);
  236. end;
  237. end;
  238. // truncate string
  239. setlength(dest,length(dest)-outleft);
  240. end;
  241. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  242. var
  243. outlength,
  244. outoffset,
  245. outleft : size_t;
  246. srcpos,
  247. destpos: pchar;
  248. mynil : pchar;
  249. my0 : size_t;
  250. err: cint;
  251. begin
  252. {$ifndef VER2_2}
  253. if PtrInt(iconv_ansi2wide)=-1 then
  254. begin
  255. DefaultAnsi2UnicodeMove(source,dest,len);
  256. exit;
  257. end;
  258. {$endif VER2_2}
  259. mynil:=nil;
  260. my0:=0;
  261. // extra space
  262. outlength:=len+1;
  263. setlength(dest,outlength);
  264. srcpos:=source;
  265. destpos:=pchar(dest);
  266. outleft:=outlength*2;
  267. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  268. begin
  269. err:=fpgetCerrno;
  270. case err of
  271. ESysEINVAL,
  272. ESysEILSEQ:
  273. begin
  274. { skip and set to '?' }
  275. inc(srcpos);
  276. dec(len);
  277. pwidechar(destpos)^:='?';
  278. inc(destpos,2);
  279. dec(outleft,2);
  280. { reset }
  281. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  282. if err=ESysEINVAL then
  283. break;
  284. end;
  285. ESysE2BIG:
  286. begin
  287. outoffset:=destpos-pchar(dest);
  288. { extend }
  289. setlength(dest,outlength+len);
  290. inc(outleft,len*2);
  291. inc(outlength,len);
  292. { string could have been moved }
  293. destpos:=pchar(dest)+outoffset;
  294. end;
  295. else
  296. runerror(231);
  297. end;
  298. end;
  299. // truncate string
  300. setlength(dest,length(dest)-outleft div 2);
  301. end;
  302. function LowerWideString(const s : WideString) : WideString;
  303. var
  304. i : SizeInt;
  305. begin
  306. SetLength(result,length(s));
  307. for i:=0 to length(s)-1 do
  308. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  309. end;
  310. function UpperWideString(const s : WideString) : WideString;
  311. var
  312. i : SizeInt;
  313. begin
  314. SetLength(result,length(s));
  315. for i:=0 to length(s)-1 do
  316. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  317. end;
  318. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  319. begin
  320. if (len>length(s)) then
  321. if (length(s) < 10*256) then
  322. setlength(s,length(s)+10)
  323. else
  324. setlength(s,length(s)+length(s) shr 8);
  325. end;
  326. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  327. begin
  328. EnsureAnsiLen(s,index);
  329. pchar(@s[index])^:=c;
  330. inc(index);
  331. end;
  332. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  333. {$ifndef beos}
  334. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  335. {$else not beos}
  336. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  337. {$endif beos}
  338. var
  339. p : pchar;
  340. mblen : size_t;
  341. begin
  342. { we know that s is unique -> avoid uniquestring calls}
  343. p:=@s[index];
  344. if (nc<=127) then
  345. ConcatCharToAnsiStr(char(nc),s,index)
  346. else
  347. begin
  348. EnsureAnsiLen(s,index+MB_CUR_MAX);
  349. {$ifndef beos}
  350. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  351. {$else not beos}
  352. mblen:=wctomb(p,wchar_t(nc));
  353. {$endif not beos}
  354. if (mblen<>size_t(-1)) then
  355. inc(index,mblen)
  356. else
  357. begin
  358. { invalid wide char }
  359. p^:='?';
  360. inc(index);
  361. end;
  362. end;
  363. end;
  364. function LowerAnsiString(const s : AnsiString) : AnsiString;
  365. var
  366. i, slen,
  367. resindex : SizeInt;
  368. mblen : size_t;
  369. {$ifndef beos}
  370. ombstate,
  371. nmbstate : mbstate_t;
  372. {$endif beos}
  373. wc : wchar_t;
  374. begin
  375. {$ifndef beos}
  376. fillchar(ombstate,sizeof(ombstate),0);
  377. fillchar(nmbstate,sizeof(nmbstate),0);
  378. {$endif beos}
  379. slen:=length(s);
  380. SetLength(result,slen+10);
  381. i:=1;
  382. resindex:=1;
  383. while (i<=slen) do
  384. begin
  385. if (s[i]<=#127) then
  386. begin
  387. wc:=wchar_t(s[i]);
  388. mblen:= 1;
  389. end
  390. else
  391. {$ifndef beos}
  392. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  393. {$else not beos}
  394. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  395. {$endif not beos}
  396. case mblen of
  397. size_t(-2):
  398. begin
  399. { partial invalid character, copy literally }
  400. while (i<=slen) do
  401. begin
  402. ConcatCharToAnsiStr(s[i],result,resindex);
  403. inc(i);
  404. end;
  405. end;
  406. size_t(-1), 0:
  407. begin
  408. { invalid or null character }
  409. ConcatCharToAnsiStr(s[i],result,resindex);
  410. inc(i);
  411. end;
  412. else
  413. begin
  414. { a valid sequence }
  415. { even if mblen = 1, the lowercase version may have a }
  416. { different length }
  417. { We can't do anything special if wchar_t is 16 bit... }
  418. {$ifndef beos}
  419. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  420. {$else not beos}
  421. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  422. {$endif not beos}
  423. inc(i,mblen);
  424. end;
  425. end;
  426. end;
  427. SetLength(result,resindex-1);
  428. end;
  429. function UpperAnsiString(const s : AnsiString) : AnsiString;
  430. var
  431. i, slen,
  432. resindex : SizeInt;
  433. mblen : size_t;
  434. {$ifndef beos}
  435. ombstate,
  436. nmbstate : mbstate_t;
  437. {$endif beos}
  438. wc : wchar_t;
  439. begin
  440. {$ifndef beos}
  441. fillchar(ombstate,sizeof(ombstate),0);
  442. fillchar(nmbstate,sizeof(nmbstate),0);
  443. {$endif beos}
  444. slen:=length(s);
  445. SetLength(result,slen+10);
  446. i:=1;
  447. resindex:=1;
  448. while (i<=slen) do
  449. begin
  450. if (s[i]<=#127) then
  451. begin
  452. wc:=wchar_t(s[i]);
  453. mblen:= 1;
  454. end
  455. else
  456. {$ifndef beos}
  457. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  458. {$else not beos}
  459. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  460. {$endif beos}
  461. case mblen of
  462. size_t(-2):
  463. begin
  464. { partial invalid character, copy literally }
  465. while (i<=slen) do
  466. begin
  467. ConcatCharToAnsiStr(s[i],result,resindex);
  468. inc(i);
  469. end;
  470. end;
  471. size_t(-1), 0:
  472. begin
  473. { invalid or null character }
  474. ConcatCharToAnsiStr(s[i],result,resindex);
  475. inc(i);
  476. end;
  477. else
  478. begin
  479. { a valid sequence }
  480. { even if mblen = 1, the uppercase version may have a }
  481. { different length }
  482. { We can't do anything special if wchar_t is 16 bit... }
  483. {$ifndef beos}
  484. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  485. {$else not beos}
  486. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  487. {$endif not beos}
  488. inc(i,mblen);
  489. end;
  490. end;
  491. end;
  492. SetLength(result,resindex-1);
  493. end;
  494. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; external name 'FPC_UTF16TOUTF32';
  495. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  496. var
  497. i, slen,
  498. destindex : SizeInt;
  499. len : longint;
  500. uch : UCS4Char;
  501. begin
  502. slen:=length(s);
  503. setlength(result,slen+1);
  504. i:=1;
  505. destindex:=0;
  506. while (i<=slen) do
  507. begin
  508. uch:=utf16toutf32(s,i,len);
  509. if (uch=UCS4Char(0)) then
  510. uch:=UCS4Char(32);
  511. result[destindex]:=uch;
  512. inc(destindex);
  513. inc(i,len);
  514. end;
  515. result[destindex]:=UCS4Char(0);
  516. { destindex <= slen }
  517. setlength(result,destindex+1);
  518. end;
  519. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  520. var
  521. hs1,hs2 : UCS4String;
  522. begin
  523. { wcscoll interprets null chars as end-of-string -> filter out }
  524. hs1:=WideStringToUCS4StringNoNulls(s1);
  525. hs2:=WideStringToUCS4StringNoNulls(s2);
  526. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  527. end;
  528. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  529. begin
  530. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  531. end;
  532. function CharLengthPChar(const Str: PChar): PtrInt;
  533. var
  534. nextlen: ptrint;
  535. s: pchar;
  536. {$ifndef beos}
  537. mbstate: mbstate_t;
  538. {$endif not beos}
  539. begin
  540. result:=0;
  541. s:=str;
  542. {$ifndef beos}
  543. fillchar(mbstate,sizeof(mbstate),0);
  544. {$endif not beos}
  545. repeat
  546. {$ifdef beos}
  547. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  548. {$else beos}
  549. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  550. {$endif beos}
  551. { skip invalid/incomplete sequences }
  552. if (nextlen<0) then
  553. nextlen:=1;
  554. inc(result,nextlen);
  555. inc(s,nextlen);
  556. until (nextlen=0);
  557. end;
  558. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  559. var
  560. nextlen: ptrint;
  561. {$ifndef beos}
  562. mbstate: mbstate_t;
  563. {$endif not beos}
  564. begin
  565. {$ifdef beos}
  566. result:=ptrint(mblen(str,maxlookahead));
  567. {$else beos}
  568. fillchar(mbstate,sizeof(mbstate),0);
  569. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  570. { mbrlen can also return -2 for "incomplete but potially valid character
  571. and data has been processed" }
  572. if result<0 then
  573. result:=-1;
  574. {$endif beos}
  575. end;
  576. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  577. var
  578. a,b: pchar;
  579. i: PtrInt;
  580. begin
  581. if not(canmodifys1) then
  582. getmem(a,len1+1)
  583. else
  584. a:=s1;
  585. for i:=0 to len1-1 do
  586. if s1[i]<>#0 then
  587. a[i]:=s1[i]
  588. else
  589. a[i]:=#32;
  590. a[len1]:=#0;
  591. if not(canmodifys2) then
  592. getmem(b,len2+1)
  593. else
  594. b:=s2;
  595. for i:=0 to len2-1 do
  596. if s2[i]<>#0 then
  597. b[i]:=s2[i]
  598. else
  599. b[i]:=#32;
  600. b[len2]:=#0;
  601. result:=strcoll(a,b);
  602. if not(canmodifys1) then
  603. freemem(a);
  604. if not(canmodifys2) then
  605. freemem(b);
  606. end;
  607. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  608. begin
  609. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  610. end;
  611. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  612. begin
  613. result:=strcoll(s1,s2);
  614. end;
  615. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  616. var
  617. a, b: AnsiString;
  618. begin
  619. a:=UpperAnsistring(s1);
  620. b:=UpperAnsistring(s2);
  621. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  622. end;
  623. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  624. begin
  625. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  626. end;
  627. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  628. var
  629. a, b: pchar;
  630. begin
  631. if (maxlen=0) then
  632. exit(0);
  633. if (s1[maxlen]<>#0) then
  634. begin
  635. getmem(a,maxlen+1);
  636. move(s1^,a^,maxlen);
  637. a[maxlen]:=#0;
  638. end
  639. else
  640. a:=s1;
  641. if (s2[maxlen]<>#0) then
  642. begin
  643. getmem(b,maxlen+1);
  644. move(s2^,b^,maxlen);
  645. b[maxlen]:=#0;
  646. end
  647. else
  648. b:=s2;
  649. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  650. if (a<>s1) then
  651. freemem(a);
  652. if (b<>s2) then
  653. freemem(b);
  654. end;
  655. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  656. var
  657. a, b: ansistring;
  658. begin
  659. if (maxlen=0) then
  660. exit(0);
  661. setlength(a,maxlen);
  662. move(s1^,a[1],maxlen);
  663. setlength(b,maxlen);
  664. move(s2^,b[1],maxlen);
  665. result:=AnsiCompareText(a,b);
  666. end;
  667. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  668. var
  669. newlen: sizeint;
  670. begin
  671. newlen:=length(s);
  672. if newlen>strlen(orgp) then
  673. fpc_rangeerror;
  674. p:=orgp;
  675. if (newlen>0) then
  676. move(s[1],p[0],newlen);
  677. p[newlen]:=#0;
  678. end;
  679. function AnsiStrLower(Str: PChar): PChar;
  680. var
  681. temp: ansistring;
  682. begin
  683. temp:=loweransistring(str);
  684. ansi2pchar(temp,str,result);
  685. end;
  686. function AnsiStrUpper(Str: PChar): PChar;
  687. var
  688. temp: ansistring;
  689. begin
  690. temp:=upperansistring(str);
  691. ansi2pchar(temp,str,result);
  692. end;
  693. procedure InitThread;
  694. begin
  695. {$if not(defined(darwin) and defined(arm)) and not defined(iphonesim)}
  696. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  697. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  698. {$else}
  699. { Unix locale settings are ignored on iPhoneOS }
  700. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  701. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  702. {$endif}
  703. end;
  704. procedure FiniThread;
  705. begin
  706. if (iconv_wide2ansi <> iconv_t(-1)) then
  707. iconv_close(iconv_wide2ansi);
  708. if (iconv_ansi2wide <> iconv_t(-1)) then
  709. iconv_close(iconv_ansi2wide);
  710. end;
  711. Procedure SetCWideStringManager;
  712. Var
  713. CWideStringManager : TUnicodeStringManager;
  714. begin
  715. CWideStringManager:=widestringmanager;
  716. With CWideStringManager do
  717. begin
  718. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  719. Ansi2WideMoveProc:=@Ansi2WideMove;
  720. UpperWideStringProc:=@UpperWideString;
  721. LowerWideStringProc:=@LowerWideString;
  722. CompareWideStringProc:=@CompareWideString;
  723. CompareTextWideStringProc:=@CompareTextWideString;
  724. CharLengthPCharProc:=@CharLengthPChar;
  725. CodePointLengthProc:=@CodePointLength;
  726. UpperAnsiStringProc:=@UpperAnsiString;
  727. LowerAnsiStringProc:=@LowerAnsiString;
  728. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  729. CompareTextAnsiStringProc:=@AnsiCompareText;
  730. StrCompAnsiStringProc:=@StrCompAnsi;
  731. StrICompAnsiStringProc:=@AnsiStrIComp;
  732. StrLCompAnsiStringProc:=@AnsiStrLComp;
  733. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  734. StrLowerAnsiStringProc:=@AnsiStrLower;
  735. StrUpperAnsiStringProc:=@AnsiStrUpper;
  736. ThreadInitProc:=@InitThread;
  737. ThreadFiniProc:=@FiniThread;
  738. { Unicode }
  739. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  740. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  741. UpperUnicodeStringProc:=@UpperWideString;
  742. LowerUnicodeStringProc:=@LowerWideString;
  743. CompareUnicodeStringProc:=@CompareWideString;
  744. CompareTextUnicodeStringProc:=@CompareTextWideString;
  745. end;
  746. SetUnicodeStringManager(CWideStringManager);
  747. end;
  748. initialization
  749. SetCWideStringManager;
  750. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  751. { with the information from the environment variables according to POSIX }
  752. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  753. setlocale(LC_ALL,'');
  754. { init conversion tables for main program }
  755. InitThread;
  756. finalization
  757. { fini conversion tables for main program }
  758. FiniThread;
  759. end.