cwstring.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. initc;
  28. Const
  29. {$ifndef useiconv}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { helper functions from libc }
  35. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  36. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  37. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  38. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  39. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  40. const
  41. {$ifdef linux}
  42. __LC_CTYPE = 0;
  43. LC_ALL = 6;
  44. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  45. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  46. CODESET = _NL_CTYPE_CODESET_NAME;
  47. {$else linux}
  48. {$ifdef darwin}
  49. CODESET = 0;
  50. LC_ALL = 0;
  51. {$else darwin}
  52. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  53. __LC_CTYPE = 0;
  54. LC_ALL = 0;
  55. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  56. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  57. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  58. {$else freebsd}
  59. {$ifdef solaris}
  60. CODESET=49;
  61. LC_ALL = 6;
  62. {$else solaris}
  63. {$ifdef beos}
  64. {$warning check correct value for BeOS}
  65. CODESET=49;
  66. LC_ALL = 6; // Checked for BeOS, but 0 under Haiku...
  67. ESysEILSEQ = EILSEQ;
  68. {$else}
  69. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  70. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  71. {$endif beos}
  72. {$endif solaris}
  73. {$endif FreeBSD}
  74. {$endif darwin}
  75. {$endif linux}
  76. { unicode encoding name }
  77. {$ifdef FPC_LITTLE_ENDIAN}
  78. unicode_encoding2 = 'UTF-16LE';
  79. unicode_encoding4 = 'UCS-4LE';
  80. {$else FPC_LITTLE_ENDIAN}
  81. unicode_encoding2 = 'UTF-16BE';
  82. unicode_encoding4 = 'UCS-4BE';
  83. {$endif FPC_LITTLE_ENDIAN}
  84. type
  85. piconv_t = ^iconv_t;
  86. iconv_t = pointer;
  87. nl_item = cint;
  88. {$ifndef beos}
  89. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  90. {$endif}
  91. {$if (not defined(bsd) and not defined(beos)) or defined(darwin)}
  92. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  93. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  94. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  95. {$else}
  96. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  97. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  98. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  99. {$endif}
  100. threadvar
  101. iconv_ansi2ucs4,
  102. iconv_ucs42ansi,
  103. iconv_ansi2wide,
  104. iconv_wide2ansi : iconv_t;
  105. {$ifdef beos}
  106. function nl_langinfo(__item:nl_item):pchar;
  107. begin
  108. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  109. // Now implement the minimum required to correctly initialize WideString support
  110. case __item of
  111. CODESET : Result := 'UTF-8'; // BeOS use UTF-8
  112. else
  113. begin
  114. Assert(False, 'nl_langinfo was called with an unknown nl_item value');
  115. Result := '';
  116. end;
  117. end;
  118. end;
  119. {$endif}
  120. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  121. var
  122. outlength,
  123. outoffset,
  124. srclen,
  125. outleft : size_t;
  126. srcpos : pwidechar;
  127. destpos: pchar;
  128. mynil : pchar;
  129. my0 : size_t;
  130. err: cint;
  131. begin
  132. mynil:=nil;
  133. my0:=0;
  134. { rought estimation }
  135. setlength(dest,len*3);
  136. outlength:=len*3;
  137. srclen:=len*2;
  138. srcpos:=source;
  139. destpos:=pchar(dest);
  140. outleft:=outlength;
  141. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  142. begin
  143. err:=fpgetCerrno;
  144. case err of
  145. { last character is incomplete sequence }
  146. ESysEINVAL,
  147. { incomplete sequence in the middle }
  148. ESysEILSEQ:
  149. begin
  150. { skip and set to '?' }
  151. inc(srcpos);
  152. dec(srclen,2);
  153. destpos^:='?';
  154. inc(destpos);
  155. dec(outleft);
  156. { reset }
  157. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  158. if err=ESysEINVAL then
  159. break;
  160. end;
  161. ESysE2BIG:
  162. begin
  163. outoffset:=destpos-pchar(dest);
  164. { extend }
  165. setlength(dest,outlength+len*3);
  166. inc(outleft,len*3);
  167. inc(outlength,len*3);
  168. { string could have been moved }
  169. destpos:=pchar(dest)+outoffset;
  170. end;
  171. else
  172. runerror(231);
  173. end;
  174. end;
  175. // truncate string
  176. setlength(dest,length(dest)-outleft);
  177. end;
  178. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  179. var
  180. outlength,
  181. outoffset,
  182. outleft : size_t;
  183. srcpos,
  184. destpos: pchar;
  185. mynil : pchar;
  186. my0 : size_t;
  187. err: cint;
  188. begin
  189. mynil:=nil;
  190. my0:=0;
  191. // extra space
  192. outlength:=len+1;
  193. setlength(dest,outlength);
  194. srcpos:=source;
  195. destpos:=pchar(dest);
  196. outleft:=outlength*2;
  197. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  198. begin
  199. err:=fpgetCerrno;
  200. case err of
  201. ESysEINVAL,
  202. ESysEILSEQ:
  203. begin
  204. { skip and set to '?' }
  205. inc(srcpos);
  206. dec(len);
  207. pwidechar(destpos)^:='?';
  208. inc(destpos,2);
  209. dec(outleft,2);
  210. { reset }
  211. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  212. if err=ESysEINVAL then
  213. break;
  214. end;
  215. ESysE2BIG:
  216. begin
  217. outoffset:=destpos-pchar(dest);
  218. { extend }
  219. setlength(dest,outlength+len);
  220. inc(outleft,len*2);
  221. inc(outlength,len);
  222. { string could have been moved }
  223. destpos:=pchar(dest)+outoffset;
  224. end;
  225. else
  226. runerror(231);
  227. end;
  228. end;
  229. // truncate string
  230. setlength(dest,length(dest)-outleft div 2);
  231. end;
  232. function LowerWideString(const s : WideString) : WideString;
  233. var
  234. i : SizeInt;
  235. begin
  236. SetLength(result,length(s));
  237. for i:=1 to length(s) do
  238. result[i]:=WideChar(towlower(wint_t(s[i])));
  239. end;
  240. function UpperWideString(const s : WideString) : WideString;
  241. var
  242. i : SizeInt;
  243. begin
  244. SetLength(result,length(s));
  245. for i:=1 to length(s) do
  246. result[i]:=WideChar(towupper(wint_t(s[i])));
  247. end;
  248. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  249. var
  250. outlength,
  251. outoffset,
  252. outleft : size_t;
  253. srcpos,
  254. destpos: pchar;
  255. mynil : pchar;
  256. my0 : size_t;
  257. begin
  258. mynil:=nil;
  259. my0:=0;
  260. // extra space
  261. outlength:=len+1;
  262. setlength(dest,outlength);
  263. outlength:=len+1;
  264. srcpos:=source;
  265. destpos:=pchar(dest);
  266. outleft:=outlength*4;
  267. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  268. begin
  269. case fpgetCerrno of
  270. ESysE2BIG:
  271. begin
  272. outoffset:=destpos-pchar(dest);
  273. { extend }
  274. setlength(dest,outlength+len);
  275. inc(outleft,len*4);
  276. inc(outlength,len);
  277. { string could have been moved }
  278. destpos:=pchar(dest)+outoffset;
  279. end;
  280. else
  281. runerror(231);
  282. end;
  283. end;
  284. // truncate string
  285. setlength(dest,length(dest)-outleft div 4);
  286. end;
  287. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  288. var
  289. hs1,hs2 : UCS4String;
  290. begin
  291. hs1:=WideStringToUCS4String(s1);
  292. hs2:=WideStringToUCS4String(s2);
  293. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  294. end;
  295. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  296. begin
  297. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  298. end;
  299. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  300. begin
  301. result:=strcoll(s1,s2);
  302. end;
  303. procedure InitThread;
  304. begin
  305. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  306. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  307. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding4);
  308. iconv_ansi2ucs4:=iconv_open(unicode_encoding4,nl_langinfo(CODESET));
  309. end;
  310. procedure FiniThread;
  311. begin
  312. if (iconv_wide2ansi <> iconv_t(-1)) then
  313. iconv_close(iconv_wide2ansi);
  314. if (iconv_ansi2wide <> iconv_t(-1)) then
  315. iconv_close(iconv_ansi2wide);
  316. if (iconv_ucs42ansi <> iconv_t(-1)) then
  317. iconv_close(iconv_ucs42ansi);
  318. if (iconv_ansi2ucs4 <> iconv_t(-1)) then
  319. iconv_close(iconv_ansi2ucs4);
  320. end;
  321. Procedure SetCWideStringManager;
  322. Var
  323. CWideStringManager : TWideStringManager;
  324. begin
  325. CWideStringManager:=widestringmanager;
  326. With CWideStringManager do
  327. begin
  328. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  329. Ansi2WideMoveProc:=@Ansi2WideMove;
  330. UpperWideStringProc:=@UpperWideString;
  331. LowerWideStringProc:=@LowerWideString;
  332. CompareWideStringProc:=@CompareWideString;
  333. CompareTextWideStringProc:=@CompareTextWideString;
  334. {
  335. CharLengthPCharProc
  336. UpperAnsiStringProc
  337. LowerAnsiStringProc
  338. CompareStrAnsiStringProc
  339. CompareTextAnsiStringProc
  340. }
  341. StrCompAnsiStringProc:=@StrCompAnsi;
  342. {
  343. StrICompAnsiStringProc
  344. StrLCompAnsiStringProc
  345. StrLICompAnsiStringProc
  346. StrLowerAnsiStringProc
  347. StrUpperAnsiStringProc
  348. }
  349. ThreadInitProc:=@InitThread;
  350. ThreadFiniProc:=@FiniThread;
  351. end;
  352. SetWideStringManager(CWideStringManager);
  353. end;
  354. initialization
  355. SetCWideStringManager;
  356. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  357. { with the information from the environment variables according to POSIX }
  358. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  359. setlocale(LC_ALL,'');
  360. { init conversion tables for main program }
  361. InitThread;
  362. finalization
  363. { fini conversion tables for main program }
  364. FiniThread;
  365. end.