cwstring.pp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. {$linklib c}
  20. {$if not defined(linux) and not defined(solaris) and not defined(android)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  21. {$if defined(haiku)}
  22. {$linklib textencoding}
  23. {$linklib locale}
  24. {$else}
  25. {$linklib iconv}
  26. {$endif}
  27. {$define useiconv}
  28. {$endif linux}
  29. Uses
  30. BaseUnix,
  31. ctypes,
  32. unix,
  33. unixtype,
  34. initc,
  35. dynlibs;
  36. Const
  37. {$ifndef useiconv}
  38. libiconvname='c'; // is in libc under Linux.
  39. libprefix='lib';
  40. {$else}
  41. {$ifdef haiku}
  42. libiconvname='textencoding'; // is in libtextencoding under Haiku
  43. libprefix='lib';
  44. {$else}
  45. {$if defined(darwin) or defined(aix)}
  46. libiconvname='libiconv';
  47. libprefix='';
  48. {$else}
  49. libiconvname='iconv';
  50. libprefix='lib';
  51. {$endif}
  52. {$endif}
  53. {$endif}
  54. { helper functions from libc }
  55. function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
  56. function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
  57. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
  58. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
  59. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  60. {$ifndef beos}
  61. function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
  62. function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
  63. function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
  64. {$else beos}
  65. function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
  66. function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
  67. function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
  68. {$endif beos}
  69. const
  70. { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
  71. { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
  72. { that's a libc macro mapped to internal functions/variables }
  73. { and thus not a stable external API on systems where libc }
  74. { breaks backwards compatibility every now and then }
  75. MB_CUR_MAX = 10;
  76. {$ifndef android} // There is no iconv on Android
  77. const
  78. {$if defined(linux) or defined(Android)}
  79. __LC_CTYPE = 0;
  80. LC_ALL = 6;
  81. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  82. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  83. CODESET = _NL_CTYPE_CODESET_NAME;
  84. {$elseif defined(darwin)}
  85. CODESET = 0;
  86. LC_ALL = 0;
  87. {$elseif defined(FreeBSD)} // actually FreeBSD5. internationalisation is afaik not default on 4.
  88. __LC_CTYPE = 0;
  89. LC_ALL = 0;
  90. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  91. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  92. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  93. {$elseif defined(solaris)}
  94. {$define ACCEPT_646}
  95. CODESET=49;
  96. LC_ALL = 6;
  97. {$elseif defined(beos)}
  98. {$warning check correct value for BeOS}
  99. CODESET=49;
  100. {$ifdef haiku}
  101. LC_ALL = 0; // Checked for Haiku
  102. {$else}
  103. LC_ALL = 6; // Checked for BeOS
  104. {$endif}
  105. ESysEILSEQ = EILSEQ;
  106. {$elseif defined(OpenBSD)}
  107. {$define ACCEPT_646}
  108. CODESET = 51;
  109. LC_ALL = 0;
  110. {$elseif defined(NetBSD)}
  111. {$define ACCEPT_646}
  112. CODESET = 51;
  113. LC_ALL = 0;
  114. {$elseif defined(aix)}
  115. CODESET = 49;
  116. LC_ALL = -1;
  117. {$else not aix}
  118. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  119. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  120. {$endif}
  121. { unicode encoding name }
  122. {$ifdef FPC_LITTLE_ENDIAN}
  123. unicode_encoding2 = 'UTF-16LE';
  124. unicode_encoding4 = 'UCS-4LE';
  125. {$else FPC_LITTLE_ENDIAN}
  126. {$ifdef AIX}
  127. unicode_encoding2 = 'UTF-16';
  128. {$else AIX}
  129. unicode_encoding2 = 'UTF-16BE';
  130. unicode_encoding4 = 'UCS-4BE';
  131. {$endif AIX}
  132. {$endif FPC_LITTLE_ENDIAN}
  133. { Requests for iconvctl }
  134. ICONV_TRIVIALP = 0; // int *argument
  135. ICONV_GET_TRANSLITERATE = 1; // int *argument
  136. ICONV_SET_TRANSLITERATE = 2; // const int *argument
  137. ICONV_GET_DISCARD_ILSEQ = 3; // int *argument
  138. ICONV_SET_DISCARD_ILSEQ = 4; // const int *argument
  139. ICONV_SET_HOOKS = 5; // const struct iconv_hooks *argument
  140. ICONV_SET_FALLBACKS = 6; // const struct iconv_fallbacks *argument
  141. type
  142. piconv_t = ^iconv_t;
  143. iconv_t = pointer;
  144. nl_item = cint;
  145. {$ifdef haiku}
  146. function nl_langinfo(__item:nl_item):pchar;cdecl;external 'locale' name 'nl_langinfo';
  147. {$else}
  148. {$ifndef beos}
  149. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  150. {$endif}
  151. {$endif}
  152. {$if (not defined(bsd) and not defined(beos)) or (defined(darwin) and not defined(cpupowerpc32))}
  153. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  154. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  155. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  156. const
  157. iconvctlname='iconvctl';
  158. {$else}
  159. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  160. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  161. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  162. const
  163. iconvctlname='libiconvctl';
  164. {$endif}
  165. var
  166. iconvctl:function(__cd:iconv_t; __request:cint; __argument:pointer):cint;cdecl;
  167. threadvar
  168. iconv_ansi2wide,
  169. iconv_wide2ansi : iconv_t;
  170. { since we cache the iconv_t converters, we have to do the same
  171. for the DefaultSystemCodePage variable since if it changes, we
  172. have to re-initialize the converters too. We can't do that via
  173. a callback in the widestring manager because DefaultSystemCodePage
  174. is not a threadvar and we can't automatically change this in all
  175. threads }
  176. current_DefaultSystemCodePage: TSystemCodePage;
  177. {$i winiconv.inc}
  178. procedure InitThread;
  179. var
  180. transliterate: cint;
  181. iconvindex: longint;
  182. {$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
  183. iconvname: rawbytestring;
  184. {$endif}
  185. begin
  186. current_DefaultSystemCodePage:=DefaultSystemCodePage;
  187. {$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
  188. iconvindex:=win2iconv(DefaultSystemCodePage);
  189. if iconvindex<>-1 then
  190. iconvname:=win2iconv_arr[iconvindex].name
  191. else
  192. { default to UTF-8 on Unix platforms }
  193. iconvname:='UTF-8';
  194. iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
  195. iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
  196. {$else}
  197. { Unix locale settings are ignored on iPhoneOS/iPhoneSimulator }
  198. iconv_wide2ansi:=iconv_open('UTF-8',unicode_encoding2);
  199. iconv_ansi2wide:=iconv_open(unicode_encoding2,'UTF-8');
  200. {$endif}
  201. if assigned(iconvctl) and
  202. (iconv_wide2ansi<>iconv_t(-1)) then
  203. begin
  204. transliterate:=1;
  205. iconvctl(iconv_wide2ansi,ICONV_SET_TRANSLITERATE,@transliterate);
  206. end;
  207. end;
  208. procedure FiniThread;
  209. begin
  210. if (iconv_wide2ansi <> iconv_t(-1)) then
  211. iconv_close(iconv_wide2ansi);
  212. if (iconv_ansi2wide <> iconv_t(-1)) then
  213. iconv_close(iconv_ansi2wide);
  214. end;
  215. {$if defined(beos) and not defined(haiku)}
  216. function nl_langinfo(__item:nl_item):pchar;
  217. begin
  218. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  219. // Now implement the minimum required to correctly initialize WideString support
  220. case __item of
  221. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  222. else
  223. begin
  224. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  225. Result := '';
  226. end;
  227. end;
  228. end;
  229. {$endif}
  230. function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
  231. var
  232. iconvindex: longint;
  233. begin
  234. { TODO: add caching (then we also don't need separate code for
  235. the default system page and other ones)
  236. -- typecasting an ansistring function result to pchar is
  237. unsafe normally, but these are constant strings -> no
  238. problem }
  239. open_iconv_for_cps:=iconv_t(-1);
  240. iconvindex:=win2iconv(cp);
  241. if iconvindex=-1 then
  242. exit;
  243. repeat
  244. if cp_is_from then
  245. open_iconv_for_cps:=iconv_open(otherencoding,pchar(win2iconv_arr[iconvindex].name))
  246. else
  247. open_iconv_for_cps:=iconv_open(pchar(win2iconv_arr[iconvindex].name),otherencoding);
  248. inc(iconvindex);
  249. until (open_iconv_for_cps<>iconv_t(-1)) or
  250. (iconvindex>high(win2iconv_arr)) or
  251. (win2iconv_arr[iconvindex].cp<>cp);
  252. end;
  253. {$ifdef aix}
  254. {$i cwstraix.inc}
  255. {$endif aix}
  256. procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
  257. var
  258. outlength,
  259. outoffset,
  260. srclen,
  261. outleft : size_t;
  262. use_iconv: iconv_t;
  263. srcpos : pwidechar;
  264. destpos: pchar;
  265. mynil : pchar;
  266. my0 : size_t;
  267. err : longint;
  268. transliterate: cint;
  269. free_iconv: boolean;
  270. {$ifdef aix}
  271. intermediate: rawbytestring;
  272. {$endif aix}
  273. begin
  274. {$ifdef aix}
  275. { AIX libiconv does not support converting cp866 to anything else except
  276. for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
  277. if cp=866 then
  278. begin
  279. Wide2AnsiMove(source,intermediate,28595,len);
  280. if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
  281. exit;
  282. end;
  283. {$endif aix}
  284. if (cp=DefaultSystemCodePage) then
  285. begin
  286. { update iconv converter in case the DefaultSystemCodePage has been
  287. changed }
  288. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  289. begin
  290. FiniThread;
  291. InitThread;
  292. end;
  293. use_iconv:=iconv_wide2ansi;
  294. free_iconv:=false;
  295. end
  296. else
  297. begin
  298. use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
  299. if (use_iconv<>iconv_t(-1)) and
  300. assigned(iconvctl) then
  301. begin
  302. transliterate:=1;
  303. iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
  304. end;
  305. free_iconv:=true;
  306. end;
  307. { unsupported encoding -> default move }
  308. if use_iconv=iconv_t(-1) then
  309. begin
  310. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  311. exit;
  312. end;
  313. mynil:=nil;
  314. my0:=0;
  315. { rought estimation }
  316. setlength(dest,len*3);
  317. outlength:=len*3;
  318. srclen:=len*2;
  319. srcpos:=source;
  320. destpos:=pchar(dest);
  321. outleft:=outlength;
  322. while iconv(use_iconv,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  323. begin
  324. err:=fpgetCerrno;
  325. case err of
  326. { last character is incomplete sequence }
  327. ESysEINVAL,
  328. { incomplete sequence in the middle }
  329. ESysEILSEQ:
  330. begin
  331. { skip and set to '?' }
  332. inc(srcpos);
  333. dec(srclen,2);
  334. destpos^:='?';
  335. inc(destpos);
  336. dec(outleft);
  337. { reset }
  338. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  339. if err=ESysEINVAL then
  340. break;
  341. end;
  342. ESysE2BIG:
  343. begin
  344. outoffset:=destpos-pchar(dest);
  345. { extend }
  346. setlength(dest,outlength+len*3);
  347. inc(outleft,len*3);
  348. inc(outlength,len*3);
  349. { string could have been moved }
  350. destpos:=pchar(dest)+outoffset;
  351. end;
  352. else
  353. runerror(231);
  354. end;
  355. end;
  356. // truncate string
  357. setlength(dest,length(dest)-outleft);
  358. SetCodePage(dest,cp,false);
  359. if free_iconv then
  360. iconv_close(use_iconv);
  361. end;
  362. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  363. var
  364. outlength,
  365. outoffset,
  366. outleft : size_t;
  367. use_iconv: iconv_t;
  368. srcpos,
  369. destpos: pchar;
  370. mynil : pchar;
  371. my0 : size_t;
  372. err: cint;
  373. iconvindex: longint;
  374. free_iconv: boolean;
  375. {$ifdef aix}
  376. intermediate: rawbytestring;
  377. {$endif aix}
  378. begin
  379. {$ifdef aix}
  380. { AIX libiconv does not support converting cp866 to anything else except
  381. for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
  382. if cp=866 then
  383. begin
  384. if handle_aix_intermediate(source,cp,cp,intermediate,len) then
  385. source:=pchar(intermediate);
  386. end;
  387. {$endif aix}
  388. if (cp=DefaultSystemCodePage) then
  389. begin
  390. { update iconv converter in case the DefaultSystemCodePage has been
  391. changed }
  392. if current_DefaultSystemCodePage<>DefaultSystemCodePage then
  393. begin
  394. FiniThread;
  395. InitThread;
  396. end;
  397. use_iconv:=iconv_ansi2wide;
  398. free_iconv:=false;
  399. end
  400. else
  401. begin
  402. { TODO: add caching (then we also don't need separate code for
  403. the default system page and other ones)
  404. -- typecasting an ansistring function result to pchar is
  405. unsafe normally, but these are constant strings -> no
  406. problem }
  407. use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
  408. free_iconv:=true;
  409. end;
  410. { unsupported encoding -> default move }
  411. if use_iconv=iconv_t(-1) then
  412. begin
  413. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  414. exit;
  415. end;
  416. mynil:=nil;
  417. my0:=0;
  418. // extra space
  419. outlength:=len+1;
  420. setlength(dest,outlength);
  421. srcpos:=source;
  422. destpos:=pchar(dest);
  423. outleft:=outlength*2;
  424. while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  425. begin
  426. err:=fpgetCerrno;
  427. case err of
  428. ESysEINVAL,
  429. ESysEILSEQ:
  430. begin
  431. { skip and set to '?' }
  432. inc(srcpos);
  433. dec(len);
  434. pwidechar(destpos)^:='?';
  435. inc(destpos,2);
  436. dec(outleft,2);
  437. { reset }
  438. iconv(use_iconv,@mynil,@my0,@mynil,@my0);
  439. if err=ESysEINVAL then
  440. break;
  441. end;
  442. ESysE2BIG:
  443. begin
  444. outoffset:=destpos-pchar(dest);
  445. { extend }
  446. setlength(dest,outlength+len);
  447. inc(outleft,len*2);
  448. inc(outlength,len);
  449. { string could have been moved }
  450. destpos:=pchar(dest)+outoffset;
  451. end;
  452. else
  453. runerror(231);
  454. end;
  455. end;
  456. // truncate string
  457. setlength(dest,length(dest)-outleft div 2);
  458. if free_iconv then
  459. iconv_close(use_iconv);
  460. end;
  461. {$else android}
  462. procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
  463. var
  464. i : SizeInt;
  465. hs : RawByteString;
  466. begin
  467. dest:='';
  468. if len = 0 then
  469. exit;
  470. if (cp = CP_UTF8) or (cp = CP_ACP) then
  471. begin
  472. // Only UTF-8 is supported for Android
  473. SetLength(hs,len*3);
  474. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,source,len);
  475. if i > 0 then
  476. begin
  477. SetLength(hs,i-1);
  478. dest:=hs;
  479. end;
  480. end
  481. else
  482. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  483. end;
  484. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  485. var
  486. i : SizeInt;
  487. hs : UnicodeString;
  488. begin
  489. // Only UTF-8 is supported for Android
  490. dest:='';
  491. if len = 0 then
  492. exit;
  493. if (cp = CP_UTF8) or (cp = CP_ACP) then
  494. begin
  495. SetLength(hs,len);
  496. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(source),len);
  497. if i>0 then
  498. begin
  499. SetLength(hs,i-1);
  500. dest:=hs;
  501. end
  502. else
  503. dest:='';
  504. end
  505. else
  506. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  507. end;
  508. {$endif android}
  509. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  510. function LowerWideString(const s : WideString) : WideString;
  511. var
  512. i : SizeInt;
  513. begin
  514. SetLength(result,length(s));
  515. for i:=0 to length(s)-1 do
  516. pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
  517. end;
  518. function UpperWideString(const s : WideString) : WideString;
  519. var
  520. i : SizeInt;
  521. begin
  522. SetLength(result,length(s));
  523. for i:=0 to length(s)-1 do
  524. pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
  525. end;
  526. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  527. begin
  528. if (len>length(s)) then
  529. if (length(s) < 10*256) then
  530. setlength(s,length(s)+10)
  531. else
  532. setlength(s,length(s)+length(s) shr 8);
  533. end;
  534. procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
  535. begin
  536. EnsureAnsiLen(s,index);
  537. pchar(@s[index])^:=c;
  538. inc(index);
  539. end;
  540. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  541. {$ifndef beos}
  542. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
  543. {$else not beos}
  544. procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
  545. {$endif beos}
  546. var
  547. p : pchar;
  548. mblen : size_t;
  549. begin
  550. { we know that s is unique -> avoid uniquestring calls}
  551. p:=@s[index];
  552. if (nc<=127) then
  553. ConcatCharToAnsiStr(char(nc),s,index)
  554. else
  555. begin
  556. EnsureAnsiLen(s,index+MB_CUR_MAX);
  557. {$ifndef beos}
  558. mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
  559. {$else not beos}
  560. mblen:=wctomb(p,wchar_t(nc));
  561. {$endif not beos}
  562. if (mblen<>size_t(-1)) then
  563. inc(index,mblen)
  564. else
  565. begin
  566. { invalid wide char }
  567. p^:='?';
  568. inc(index);
  569. end;
  570. end;
  571. end;
  572. function LowerAnsiString(const s : AnsiString) : AnsiString;
  573. var
  574. i, slen,
  575. resindex : SizeInt;
  576. mblen : size_t;
  577. {$ifndef beos}
  578. ombstate,
  579. nmbstate : mbstate_t;
  580. {$endif beos}
  581. wc : wchar_t;
  582. begin
  583. {$ifndef beos}
  584. fillchar(ombstate,sizeof(ombstate),0);
  585. fillchar(nmbstate,sizeof(nmbstate),0);
  586. {$endif beos}
  587. slen:=length(s);
  588. SetLength(result,slen+10);
  589. i:=1;
  590. resindex:=1;
  591. while (i<=slen) do
  592. begin
  593. if (s[i]<=#127) then
  594. begin
  595. wc:=wchar_t(s[i]);
  596. mblen:= 1;
  597. end
  598. else
  599. {$ifndef beos}
  600. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  601. {$else not beos}
  602. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  603. {$endif not beos}
  604. case mblen of
  605. size_t(-2):
  606. begin
  607. { partial invalid character, copy literally }
  608. while (i<=slen) do
  609. begin
  610. ConcatCharToAnsiStr(s[i],result,resindex);
  611. inc(i);
  612. end;
  613. end;
  614. size_t(-1), 0:
  615. begin
  616. { invalid or null character }
  617. ConcatCharToAnsiStr(s[i],result,resindex);
  618. inc(i);
  619. end;
  620. else
  621. begin
  622. { a valid sequence }
  623. { even if mblen = 1, the lowercase version may have a }
  624. { different length }
  625. { We can't do anything special if wchar_t is 16 bit... }
  626. {$ifndef beos}
  627. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
  628. {$else not beos}
  629. ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
  630. {$endif not beos}
  631. inc(i,mblen);
  632. end;
  633. end;
  634. end;
  635. SetLength(result,resindex-1);
  636. end;
  637. function UpperAnsiString(const s : AnsiString) : AnsiString;
  638. var
  639. i, slen,
  640. resindex : SizeInt;
  641. mblen : size_t;
  642. {$ifndef beos}
  643. ombstate,
  644. nmbstate : mbstate_t;
  645. {$endif beos}
  646. wc : wchar_t;
  647. begin
  648. {$ifndef beos}
  649. fillchar(ombstate,sizeof(ombstate),0);
  650. fillchar(nmbstate,sizeof(nmbstate),0);
  651. {$endif beos}
  652. slen:=length(s);
  653. SetLength(result,slen+10);
  654. i:=1;
  655. resindex:=1;
  656. while (i<=slen) do
  657. begin
  658. if (s[i]<=#127) then
  659. begin
  660. wc:=wchar_t(s[i]);
  661. mblen:= 1;
  662. end
  663. else
  664. {$ifndef beos}
  665. mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
  666. {$else not beos}
  667. mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
  668. {$endif beos}
  669. case mblen of
  670. size_t(-2):
  671. begin
  672. { partial invalid character, copy literally }
  673. while (i<=slen) do
  674. begin
  675. ConcatCharToAnsiStr(s[i],result,resindex);
  676. inc(i);
  677. end;
  678. end;
  679. size_t(-1), 0:
  680. begin
  681. { invalid or null character }
  682. ConcatCharToAnsiStr(s[i],result,resindex);
  683. inc(i);
  684. end;
  685. else
  686. begin
  687. { a valid sequence }
  688. { even if mblen = 1, the uppercase version may have a }
  689. { different length }
  690. { We can't do anything special if wchar_t is 16 bit... }
  691. {$ifndef beos}
  692. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
  693. {$else not beos}
  694. ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
  695. {$endif not beos}
  696. inc(i,mblen);
  697. end;
  698. end;
  699. end;
  700. SetLength(result,resindex-1);
  701. end;
  702. function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
  703. var
  704. i, slen,
  705. destindex : SizeInt;
  706. uch : UCS4Char;
  707. begin
  708. slen:=length(s);
  709. setlength(result,slen+1);
  710. i:=1;
  711. destindex:=0;
  712. while (i<=slen) do
  713. begin
  714. uch:=UCS4Char(s[i]);
  715. if (uch=0) then
  716. result[destindex]:=32
  717. else if (uch<=$d7ff) or (uch>=$e000) then
  718. result[destindex]:=uch
  719. else if (uch<=$dbff) and
  720. (i<slen) and
  721. (s[i+1]>=#$dc00) and
  722. (s[i+1]<=#$dfff) then
  723. begin
  724. result[destindex]:=(UCS4Char(uch-$d7c0) shl 10)+(UCS4Char(s[i+1]) xor $dc00);
  725. inc(i);
  726. end
  727. else { invalid surrogate pair }
  728. result[destindex]:=uch;
  729. inc(i);
  730. inc(destindex);
  731. end;
  732. result[destindex]:=UCS4Char(0);
  733. { Trimming length in this particular case is just a waste of time,
  734. because result will be interpreted as null-terminated and discarded
  735. almost immediately }
  736. end;
  737. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  738. {$if not(defined (aix) and defined(cpupowerpc32))}
  739. var
  740. hs1,hs2 : UCS4String;
  741. begin
  742. { wcscoll interprets null chars as end-of-string -> filter out }
  743. hs1:=WideStringToUCS4StringNoNulls(s1);
  744. hs2:=WideStringToUCS4StringNoNulls(s2);
  745. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  746. end;
  747. {$else}
  748. { AIX/PPC32 has a 16 bit wchar_t }
  749. var
  750. i, len: longint;
  751. hs1, hs2: array of widechar;
  752. begin
  753. len:=length(s1);
  754. setlength(hs1,len+1);
  755. for i:=1 to len do
  756. if s1[i]<>#0 then
  757. hs1[i-1]:=s1[i]
  758. else
  759. hs1[i-1]:=#32;
  760. hs1[len]:=#0;
  761. len:=length(s2);
  762. setlength(hs2,len+1);
  763. for i:=1 to len do
  764. if s2[i]<>#0 then
  765. hs2[i-1]:=s2[i]
  766. else
  767. hs2[i-1]:=#32;
  768. hs2[len]:=#0;
  769. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  770. end;
  771. {$endif}
  772. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  773. begin
  774. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  775. end;
  776. function CharLengthPChar(const Str: PChar): PtrInt;
  777. var
  778. nextlen: ptrint;
  779. s: pchar;
  780. {$ifndef beos}
  781. mbstate: mbstate_t;
  782. {$endif not beos}
  783. begin
  784. result:=0;
  785. s:=str;
  786. {$ifndef beos}
  787. fillchar(mbstate,sizeof(mbstate),0);
  788. {$endif not beos}
  789. repeat
  790. {$ifdef beos}
  791. nextlen:=ptrint(mblen(str,MB_CUR_MAX));
  792. {$else beos}
  793. nextlen:=ptrint(mbrlen(str,MB_CUR_MAX,@mbstate));
  794. {$endif beos}
  795. { skip invalid/incomplete sequences }
  796. if (nextlen<0) then
  797. nextlen:=1;
  798. inc(result,nextlen);
  799. inc(s,nextlen);
  800. until (nextlen=0);
  801. end;
  802. function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
  803. {$ifndef beos}
  804. var
  805. mbstate: mbstate_t;
  806. {$endif not beos}
  807. begin
  808. {$ifdef beos}
  809. result:=ptrint(mblen(str,maxlookahead));
  810. {$else beos}
  811. fillchar(mbstate,sizeof(mbstate),0);
  812. result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
  813. { mbrlen can also return -2 for "incomplete but potially valid character
  814. and data has been processed" }
  815. if result<0 then
  816. result:=-1;
  817. {$endif beos}
  818. end;
  819. function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
  820. var
  821. a,b: pchar;
  822. i: PtrInt;
  823. begin
  824. if not(canmodifys1) then
  825. getmem(a,len1+1)
  826. else
  827. a:=s1;
  828. for i:=0 to len1-1 do
  829. if s1[i]<>#0 then
  830. a[i]:=s1[i]
  831. else
  832. a[i]:=#32;
  833. a[len1]:=#0;
  834. if not(canmodifys2) then
  835. getmem(b,len2+1)
  836. else
  837. b:=s2;
  838. for i:=0 to len2-1 do
  839. if s2[i]<>#0 then
  840. b[i]:=s2[i]
  841. else
  842. b[i]:=#32;
  843. b[len2]:=#0;
  844. result:=strcoll(a,b);
  845. if not(canmodifys1) then
  846. freemem(a);
  847. if not(canmodifys2) then
  848. freemem(b);
  849. end;
  850. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  851. begin
  852. result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
  853. end;
  854. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  855. begin
  856. result:=strcoll(s1,s2);
  857. end;
  858. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  859. var
  860. a, b: AnsiString;
  861. begin
  862. a:=UpperAnsistring(s1);
  863. b:=UpperAnsistring(s2);
  864. result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
  865. end;
  866. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  867. begin
  868. result:=AnsiCompareText(ansistring(s1),ansistring(s2));
  869. end;
  870. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  871. var
  872. a, b: pchar;
  873. begin
  874. if (maxlen=0) then
  875. exit(0);
  876. if (s1[maxlen]<>#0) then
  877. begin
  878. getmem(a,maxlen+1);
  879. move(s1^,a^,maxlen);
  880. a[maxlen]:=#0;
  881. end
  882. else
  883. a:=s1;
  884. if (s2[maxlen]<>#0) then
  885. begin
  886. getmem(b,maxlen+1);
  887. move(s2^,b^,maxlen);
  888. b[maxlen]:=#0;
  889. end
  890. else
  891. b:=s2;
  892. result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
  893. if (a<>s1) then
  894. freemem(a);
  895. if (b<>s2) then
  896. freemem(b);
  897. end;
  898. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  899. var
  900. a, b: ansistring;
  901. begin
  902. if (maxlen=0) then
  903. exit(0);
  904. setlength(a,maxlen);
  905. move(s1^,a[1],maxlen);
  906. setlength(b,maxlen);
  907. move(s2^,b[1],maxlen);
  908. result:=AnsiCompareText(a,b);
  909. end;
  910. procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
  911. var
  912. newlen: sizeint;
  913. begin
  914. newlen:=length(s);
  915. if newlen>strlen(orgp) then
  916. fpc_rangeerror;
  917. p:=orgp;
  918. if (newlen>0) then
  919. move(s[1],p[0],newlen);
  920. p[newlen]:=#0;
  921. end;
  922. function AnsiStrLower(Str: PChar): PChar;
  923. var
  924. temp: ansistring;
  925. begin
  926. temp:=loweransistring(str);
  927. ansi2pchar(temp,str,result);
  928. end;
  929. function AnsiStrUpper(Str: PChar): PChar;
  930. var
  931. temp: ansistring;
  932. begin
  933. temp:=upperansistring(str);
  934. ansi2pchar(temp,str,result);
  935. end;
  936. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  937. {$ifndef android}
  938. var
  939. langinfo: pchar;
  940. {$endif android}
  941. begin
  942. {$ifdef android}
  943. Result := CP_UTF8; // Android has only UTF-8
  944. {$else}
  945. langinfo:=nl_langinfo(CODESET);
  946. { there's a bug in the Mac OS X 10.5 libc (based on FreeBSD's)
  947. that causes it to return an empty string of UTF-8 locales
  948. -> patch up (and in general, UTF-8 is a good default on
  949. Unix platforms) }
  950. if not assigned(langinfo) or
  951. (langinfo^=#0) then
  952. langinfo:='UTF-8';
  953. Result := iconv2win(ansistring(langinfo));
  954. {$endif android}
  955. end;
  956. {$ifdef FPC_HAS_CPSTRING}
  957. {$i textrec.inc}
  958. procedure SetStdIOCodePage(var T: Text); inline;
  959. begin
  960. case TextRec(T).Mode of
  961. fmInput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleInput);
  962. fmOutput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleOutput);
  963. end;
  964. end;
  965. procedure SetStdIOCodePages; inline;
  966. begin
  967. SetStdIOCodePage(Input);
  968. SetStdIOCodePage(Output);
  969. SetStdIOCodePage(ErrOutput);
  970. SetStdIOCodePage(StdOut);
  971. SetStdIOCodePage(StdErr);
  972. end;
  973. {$endif FPC_HAS_CPSTRING}
  974. Procedure SetCWideStringManager;
  975. Var
  976. CWideStringManager : TUnicodeStringManager;
  977. begin
  978. CWideStringManager:=widestringmanager;
  979. With CWideStringManager do
  980. begin
  981. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  982. Ansi2WideMoveProc:=@Ansi2WideMove;
  983. UpperWideStringProc:=@UpperWideString;
  984. LowerWideStringProc:=@LowerWideString;
  985. CompareWideStringProc:=@CompareWideString;
  986. CompareTextWideStringProc:=@CompareTextWideString;
  987. CharLengthPCharProc:=@CharLengthPChar;
  988. CodePointLengthProc:=@CodePointLength;
  989. UpperAnsiStringProc:=@UpperAnsiString;
  990. LowerAnsiStringProc:=@LowerAnsiString;
  991. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  992. CompareTextAnsiStringProc:=@AnsiCompareText;
  993. StrCompAnsiStringProc:=@StrCompAnsi;
  994. StrICompAnsiStringProc:=@AnsiStrIComp;
  995. StrLCompAnsiStringProc:=@AnsiStrLComp;
  996. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  997. StrLowerAnsiStringProc:=@AnsiStrLower;
  998. StrUpperAnsiStringProc:=@AnsiStrUpper;
  999. {$ifdef android}
  1000. ThreadInitProc:=nil;
  1001. ThreadFiniProc:=nil;
  1002. {$else}
  1003. ThreadInitProc:=@InitThread;
  1004. ThreadFiniProc:=@FiniThread;
  1005. {$endif android}
  1006. { Unicode }
  1007. Unicode2AnsiMoveProc:=@Wide2AnsiMove;
  1008. Ansi2UnicodeMoveProc:=@Ansi2WideMove;
  1009. UpperUnicodeStringProc:=@UpperWideString;
  1010. LowerUnicodeStringProc:=@LowerWideString;
  1011. CompareUnicodeStringProc:=@CompareWideString;
  1012. CompareTextUnicodeStringProc:=@CompareTextWideString;
  1013. { CodePage }
  1014. GetStandardCodePageProc:=@GetStandardCodePage;
  1015. end;
  1016. SetUnicodeStringManager(CWideStringManager);
  1017. end;
  1018. {$ifndef android}
  1019. var
  1020. iconvlib:TLibHandle;
  1021. {$endif android}
  1022. initialization
  1023. SetCWideStringManager;
  1024. {$ifndef android}
  1025. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  1026. { with the information from the environment variables according to POSIX }
  1027. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  1028. setlocale(LC_ALL,'');
  1029. { load iconvctl function }
  1030. iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix);
  1031. if iconvlib<>0 then
  1032. pointer(iconvctl):=GetProcAddress(iconvlib,iconvctlname);
  1033. {$endif android}
  1034. { set the DefaultSystemCodePage }
  1035. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  1036. {$ifdef FPC_HAS_CPSTRING}
  1037. SetStdIOCodePages;
  1038. {$endif FPC_HAS_CPSTRING}
  1039. {$ifndef android}
  1040. { init conversion tables for main program }
  1041. InitThread;
  1042. {$endif android}
  1043. finalization
  1044. {$ifndef android}
  1045. { fini conversion tables for main program }
  1046. FiniThread;
  1047. { unload iconv library }
  1048. if iconvlib<>0 then
  1049. FreeLibrary(iconvlib);
  1050. {$endif android}
  1051. end.