cwstring.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. initc;
  28. Const
  29. {$ifndef useiconv}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { helper functions from libc }
  35. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  36. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  37. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  38. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  39. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  40. const
  41. {$ifdef linux}
  42. __LC_CTYPE = 0;
  43. LC_ALL = 6;
  44. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  45. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  46. CODESET = _NL_CTYPE_CODESET_NAME;
  47. {$else linux}
  48. {$ifdef darwin}
  49. CODESET = 0;
  50. LC_ALL = 0;
  51. {$else darwin}
  52. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  53. __LC_CTYPE = 0;
  54. LC_ALL = 0;
  55. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  56. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  57. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  58. {$else freebsd}
  59. {$ifdef solaris}
  60. CODESET=49;
  61. LC_ALL = 6;
  62. {$else solaris}
  63. {$ifdef beos}
  64. {$warning check correct value for BeOS}
  65. CODESET=49;
  66. LC_ALL = 6; // Checked for BeOS, but 0 under Haiku...
  67. ESysEILSEQ = EILSEQ;
  68. {$else}
  69. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  70. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  71. {$endif beos}
  72. {$endif solaris}
  73. {$endif FreeBSD}
  74. {$endif darwin}
  75. {$endif linux}
  76. { unicode encoding name }
  77. {$ifdef FPC_LITTLE_ENDIAN}
  78. unicode_encoding2 = 'UTF-16LE';
  79. unicode_encoding4 = 'UCS-4LE';
  80. {$else FPC_LITTLE_ENDIAN}
  81. unicode_encoding2 = 'UTF-16BE';
  82. unicode_encoding4 = 'UCS-4BE';
  83. {$endif FPC_LITTLE_ENDIAN}
  84. type
  85. piconv_t = ^iconv_t;
  86. iconv_t = pointer;
  87. nl_item = cint;
  88. {$ifndef beos}
  89. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  90. {$endif}
  91. { $ ifndef bsd}
  92. {$if not defined(bsd) and not defined(beos)}
  93. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  94. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  95. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  96. {$else}
  97. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  98. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  99. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  100. {$endif}
  101. threadvar
  102. iconv_ansi2ucs4,
  103. iconv_ucs42ansi,
  104. iconv_ansi2wide,
  105. iconv_wide2ansi : iconv_t;
  106. {$ifdef beos}
  107. function nl_langinfo(__item:nl_item):pchar;
  108. begin
  109. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  110. Result := '';
  111. end;
  112. {$endif}
  113. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  114. var
  115. outlength,
  116. outoffset,
  117. srclen,
  118. outleft : size_t;
  119. srcpos : pwidechar;
  120. destpos: pchar;
  121. mynil : pchar;
  122. my0 : size_t;
  123. begin
  124. mynil:=nil;
  125. my0:=0;
  126. { rought estimation }
  127. setlength(dest,len*3);
  128. outlength:=len*3;
  129. srclen:=len*2;
  130. srcpos:=source;
  131. destpos:=pchar(dest);
  132. outleft:=outlength;
  133. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  134. begin
  135. case fpgetCerrno of
  136. ESysEILSEQ:
  137. begin
  138. { skip and set to '?' }
  139. inc(srcpos);
  140. dec(srclen,2);
  141. destpos^:='?';
  142. inc(destpos);
  143. dec(outleft);
  144. { reset }
  145. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  146. end;
  147. ESysE2BIG:
  148. begin
  149. outoffset:=destpos-pchar(dest);
  150. { extend }
  151. setlength(dest,outlength+len*3);
  152. inc(outleft,len*3);
  153. inc(outlength,len*3);
  154. { string could have been moved }
  155. destpos:=pchar(dest)+outoffset;
  156. end;
  157. else
  158. runerror(231);
  159. end;
  160. end;
  161. // truncate string
  162. setlength(dest,length(dest)-outleft);
  163. end;
  164. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  165. var
  166. outlength,
  167. outoffset,
  168. outleft : size_t;
  169. srcpos,
  170. destpos: pchar;
  171. mynil : pchar;
  172. my0 : size_t;
  173. begin
  174. mynil:=nil;
  175. my0:=0;
  176. // extra space
  177. outlength:=len+1;
  178. setlength(dest,outlength);
  179. outlength:=len+1;
  180. srcpos:=source;
  181. destpos:=pchar(dest);
  182. outleft:=outlength*2;
  183. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  184. begin
  185. case fpgetCerrno of
  186. ESysEILSEQ:
  187. begin
  188. { skip and set to '?' }
  189. inc(srcpos);
  190. dec(len);
  191. pwidechar(destpos)^:='?';
  192. inc(destpos,2);
  193. dec(outleft,2);
  194. { reset }
  195. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  196. end;
  197. ESysE2BIG:
  198. begin
  199. outoffset:=destpos-pchar(dest);
  200. { extend }
  201. setlength(dest,outlength+len);
  202. inc(outleft,len*2);
  203. inc(outlength,len);
  204. { string could have been moved }
  205. destpos:=pchar(dest)+outoffset;
  206. end;
  207. else
  208. runerror(231);
  209. end;
  210. end;
  211. // truncate string
  212. setlength(dest,length(dest)-outleft div 2);
  213. end;
  214. function LowerWideString(const s : WideString) : WideString;
  215. var
  216. i : SizeInt;
  217. begin
  218. SetLength(result,length(s));
  219. for i:=1 to length(s) do
  220. result[i]:=WideChar(towlower(wint_t(s[i])));
  221. end;
  222. function UpperWideString(const s : WideString) : WideString;
  223. var
  224. i : SizeInt;
  225. begin
  226. SetLength(result,length(s));
  227. for i:=1 to length(s) do
  228. result[i]:=WideChar(towupper(wint_t(s[i])));
  229. end;
  230. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  231. var
  232. outlength,
  233. outoffset,
  234. outleft : size_t;
  235. srcpos,
  236. destpos: pchar;
  237. mynil : pchar;
  238. my0 : size_t;
  239. begin
  240. mynil:=nil;
  241. my0:=0;
  242. // extra space
  243. outlength:=len+1;
  244. setlength(dest,outlength);
  245. outlength:=len+1;
  246. srcpos:=source;
  247. destpos:=pchar(dest);
  248. outleft:=outlength*4;
  249. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  250. begin
  251. case fpgetCerrno of
  252. ESysE2BIG:
  253. begin
  254. outoffset:=destpos-pchar(dest);
  255. { extend }
  256. setlength(dest,outlength+len);
  257. inc(outleft,len*4);
  258. inc(outlength,len);
  259. { string could have been moved }
  260. destpos:=pchar(dest)+outoffset;
  261. end;
  262. else
  263. runerror(231);
  264. end;
  265. end;
  266. // truncate string
  267. setlength(dest,length(dest)-outleft div 4);
  268. end;
  269. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  270. var
  271. hs1,hs2 : UCS4String;
  272. begin
  273. hs1:=WideStringToUCS4String(s1);
  274. hs2:=WideStringToUCS4String(s2);
  275. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  276. end;
  277. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  278. begin
  279. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  280. end;
  281. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  282. begin
  283. result:=strcoll(s1,s2);
  284. end;
  285. procedure InitThread;
  286. begin
  287. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  288. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  289. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding4);
  290. iconv_ansi2ucs4:=iconv_open(unicode_encoding4,nl_langinfo(CODESET));
  291. end;
  292. procedure FiniThread;
  293. begin
  294. if (iconv_wide2ansi <> iconv_t(-1)) then
  295. iconv_close(iconv_wide2ansi);
  296. if (iconv_ansi2wide <> iconv_t(-1)) then
  297. iconv_close(iconv_ansi2wide);
  298. if (iconv_ucs42ansi <> iconv_t(-1)) then
  299. iconv_close(iconv_ucs42ansi);
  300. if (iconv_ansi2ucs4 <> iconv_t(-1)) then
  301. iconv_close(iconv_ansi2ucs4);
  302. end;
  303. Procedure SetCWideStringManager;
  304. Var
  305. CWideStringManager : TWideStringManager;
  306. begin
  307. CWideStringManager:=widestringmanager;
  308. With CWideStringManager do
  309. begin
  310. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  311. Ansi2WideMoveProc:=@Ansi2WideMove;
  312. UpperWideStringProc:=@UpperWideString;
  313. LowerWideStringProc:=@LowerWideString;
  314. CompareWideStringProc:=@CompareWideString;
  315. CompareTextWideStringProc:=@CompareTextWideString;
  316. {
  317. CharLengthPCharProc
  318. UpperAnsiStringProc
  319. LowerAnsiStringProc
  320. CompareStrAnsiStringProc
  321. CompareTextAnsiStringProc
  322. }
  323. StrCompAnsiStringProc:=@StrCompAnsi;
  324. {
  325. StrICompAnsiStringProc
  326. StrLCompAnsiStringProc
  327. StrLICompAnsiStringProc
  328. StrLowerAnsiStringProc
  329. StrUpperAnsiStringProc
  330. }
  331. ThreadInitProc:=@InitThread;
  332. ThreadFiniProc:=@FiniThread;
  333. end;
  334. SetWideStringManager(CWideStringManager);
  335. end;
  336. initialization
  337. SetCWideStringManager;
  338. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  339. { with the information from the environment variables according to POSIX }
  340. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  341. setlocale(LC_ALL,'');
  342. { init conversion tables for main program }
  343. InitThread;
  344. finalization
  345. { fini conversion tables for main program }
  346. FiniThread;
  347. end.