cwstring.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. initc;
  28. Const
  29. {$ifndef useiconv}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { helper functions from libc }
  35. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  36. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  37. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  38. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  39. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  40. const
  41. {$ifdef linux}
  42. __LC_CTYPE = 0;
  43. LC_ALL = 6;
  44. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  45. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  46. CODESET = _NL_CTYPE_CODESET_NAME;
  47. {$else linux}
  48. {$ifdef darwin}
  49. CODESET = 0;
  50. LC_ALL = 0;
  51. {$else darwin}
  52. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  53. __LC_CTYPE = 0;
  54. LC_ALL = 0;
  55. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  56. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  57. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  58. {$else freebsd}
  59. {$ifdef solaris}
  60. CODESET=49;
  61. LC_ALL = 6;
  62. {$else}
  63. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  64. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  65. {$endif solaris}
  66. {$endif FreeBSD}
  67. {$endif darwin}
  68. {$endif linux}
  69. { unicode encoding name }
  70. {$ifdef FPC_LITTLE_ENDIAN}
  71. unicode_encoding2 = 'UTF-16LE';
  72. unicode_encoding4 = 'UCS-4LE';
  73. {$else FPC_LITTLE_ENDIAN}
  74. unicode_encoding2 = 'UTF-16BE';
  75. unicode_encoding4 = 'UCS-4BE';
  76. {$endif FPC_LITTLE_ENDIAN}
  77. type
  78. piconv_t = ^iconv_t;
  79. iconv_t = pointer;
  80. nl_item = cint;
  81. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  82. {$ifndef bsd}
  83. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  84. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  85. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  86. {$else}
  87. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  88. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  89. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  90. {$endif}
  91. threadvar
  92. iconv_ansi2ucs4,
  93. iconv_ucs42ansi,
  94. iconv_ansi2wide,
  95. iconv_wide2ansi : iconv_t;
  96. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  97. var
  98. outlength,
  99. outoffset,
  100. srclen,
  101. outleft : size_t;
  102. srcpos : pwidechar;
  103. destpos: pchar;
  104. mynil : pchar;
  105. my0 : size_t;
  106. err: cint;
  107. begin
  108. mynil:=nil;
  109. my0:=0;
  110. { rought estimation }
  111. setlength(dest,len*3);
  112. outlength:=len*3;
  113. srclen:=len*2;
  114. srcpos:=source;
  115. destpos:=pchar(dest);
  116. outleft:=outlength;
  117. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  118. begin
  119. err:=fpgetCerrno;
  120. case err of
  121. { last character is incomplete sequence }
  122. ESysEINVAL,
  123. { incomplete sequence in the middle }
  124. ESysEILSEQ:
  125. begin
  126. { skip and set to '?' }
  127. inc(srcpos);
  128. dec(srclen,2);
  129. destpos^:='?';
  130. inc(destpos);
  131. dec(outleft);
  132. { reset }
  133. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  134. if err=ESysEINVAL then
  135. break;
  136. end;
  137. ESysE2BIG:
  138. begin
  139. outoffset:=destpos-pchar(dest);
  140. { extend }
  141. setlength(dest,outlength+len*3);
  142. inc(outleft,len*3);
  143. inc(outlength,len*3);
  144. { string could have been moved }
  145. destpos:=pchar(dest)+outoffset;
  146. end;
  147. else
  148. runerror(231);
  149. end;
  150. end;
  151. // truncate string
  152. setlength(dest,length(dest)-outleft);
  153. end;
  154. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  155. var
  156. outlength,
  157. outoffset,
  158. outleft : size_t;
  159. srcpos,
  160. destpos: pchar;
  161. mynil : pchar;
  162. my0 : size_t;
  163. err: cint;
  164. begin
  165. mynil:=nil;
  166. my0:=0;
  167. // extra space
  168. outlength:=len+1;
  169. setlength(dest,outlength);
  170. srcpos:=source;
  171. destpos:=pchar(dest);
  172. outleft:=outlength*2;
  173. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  174. begin
  175. err:=fpgetCerrno;
  176. case err of
  177. ESysEINVAL,
  178. ESysEILSEQ:
  179. begin
  180. { skip and set to '?' }
  181. inc(srcpos);
  182. dec(len);
  183. pwidechar(destpos)^:='?';
  184. inc(destpos,2);
  185. dec(outleft,2);
  186. { reset }
  187. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  188. if err=ESysEINVAL then
  189. break;
  190. end;
  191. ESysE2BIG:
  192. begin
  193. outoffset:=destpos-pchar(dest);
  194. { extend }
  195. setlength(dest,outlength+len);
  196. inc(outleft,len*2);
  197. inc(outlength,len);
  198. { string could have been moved }
  199. destpos:=pchar(dest)+outoffset;
  200. end;
  201. else
  202. runerror(231);
  203. end;
  204. end;
  205. // truncate string
  206. setlength(dest,length(dest)-outleft div 2);
  207. end;
  208. function LowerWideString(const s : WideString) : WideString;
  209. var
  210. i : SizeInt;
  211. begin
  212. SetLength(result,length(s));
  213. for i:=1 to length(s) do
  214. result[i]:=WideChar(towlower(wint_t(s[i])));
  215. end;
  216. function UpperWideString(const s : WideString) : WideString;
  217. var
  218. i : SizeInt;
  219. begin
  220. SetLength(result,length(s));
  221. for i:=1 to length(s) do
  222. result[i]:=WideChar(towupper(wint_t(s[i])));
  223. end;
  224. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  225. var
  226. outlength,
  227. outoffset,
  228. outleft : size_t;
  229. srcpos,
  230. destpos: pchar;
  231. mynil : pchar;
  232. my0 : size_t;
  233. begin
  234. mynil:=nil;
  235. my0:=0;
  236. // extra space
  237. outlength:=len+1;
  238. setlength(dest,outlength);
  239. outlength:=len+1;
  240. srcpos:=source;
  241. destpos:=pchar(dest);
  242. outleft:=outlength*4;
  243. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  244. begin
  245. case fpgetCerrno of
  246. ESysE2BIG:
  247. begin
  248. outoffset:=destpos-pchar(dest);
  249. { extend }
  250. setlength(dest,outlength+len);
  251. inc(outleft,len*4);
  252. inc(outlength,len);
  253. { string could have been moved }
  254. destpos:=pchar(dest)+outoffset;
  255. end;
  256. else
  257. runerror(231);
  258. end;
  259. end;
  260. // truncate string
  261. setlength(dest,length(dest)-outleft div 4);
  262. end;
  263. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  264. var
  265. hs1,hs2 : UCS4String;
  266. begin
  267. hs1:=WideStringToUCS4String(s1);
  268. hs2:=WideStringToUCS4String(s2);
  269. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  270. end;
  271. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  272. begin
  273. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  274. end;
  275. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  276. begin
  277. result:=strcoll(s1,s2);
  278. end;
  279. procedure InitThread;
  280. begin
  281. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
  282. iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
  283. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding4);
  284. iconv_ansi2ucs4:=iconv_open(unicode_encoding4,nl_langinfo(CODESET));
  285. end;
  286. procedure FiniThread;
  287. begin
  288. if (iconv_wide2ansi <> iconv_t(-1)) then
  289. iconv_close(iconv_wide2ansi);
  290. if (iconv_ansi2wide <> iconv_t(-1)) then
  291. iconv_close(iconv_ansi2wide);
  292. if (iconv_ucs42ansi <> iconv_t(-1)) then
  293. iconv_close(iconv_ucs42ansi);
  294. if (iconv_ansi2ucs4 <> iconv_t(-1)) then
  295. iconv_close(iconv_ansi2ucs4);
  296. end;
  297. Procedure SetCWideStringManager;
  298. Var
  299. CWideStringManager : TWideStringManager;
  300. begin
  301. CWideStringManager:=widestringmanager;
  302. With CWideStringManager do
  303. begin
  304. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  305. Ansi2WideMoveProc:=@Ansi2WideMove;
  306. UpperWideStringProc:=@UpperWideString;
  307. LowerWideStringProc:=@LowerWideString;
  308. CompareWideStringProc:=@CompareWideString;
  309. CompareTextWideStringProc:=@CompareTextWideString;
  310. {
  311. CharLengthPCharProc
  312. UpperAnsiStringProc
  313. LowerAnsiStringProc
  314. CompareStrAnsiStringProc
  315. CompareTextAnsiStringProc
  316. }
  317. StrCompAnsiStringProc:=@StrCompAnsi;
  318. {
  319. StrICompAnsiStringProc
  320. StrLCompAnsiStringProc
  321. StrLICompAnsiStringProc
  322. StrLowerAnsiStringProc
  323. StrUpperAnsiStringProc
  324. }
  325. ThreadInitProc:=@InitThread;
  326. ThreadFiniProc:=@FiniThread;
  327. end;
  328. SetWideStringManager(CWideStringManager);
  329. end;
  330. initialization
  331. SetCWideStringManager;
  332. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  333. { with the information from the environment variables according to POSIX }
  334. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  335. setlocale(LC_ALL,'');
  336. { init conversion tables for main program }
  337. InitThread;
  338. finalization
  339. { fini conversion tables for main program }
  340. FiniThread;
  341. end.