cwstring.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$ifndef FreeBSD5}
  20. {$ifndef SunOS}
  21. {$linklib iconv}
  22. {$define useiconv}
  23. {$endif}
  24. {$endif}
  25. {$endif linux}
  26. Uses
  27. BaseUnix,
  28. ctypes,
  29. unix,
  30. unixtype,
  31. sysutils,
  32. initc;
  33. Const
  34. {$ifndef useiconv}
  35. libiconvname='c'; // is in libc for several OSes
  36. {$else}
  37. libiconvname='iconv';
  38. {$endif}
  39. { Case-mapping "arrays" }
  40. var
  41. AnsiUpperChars: AnsiString; // 1..255
  42. AnsiLowerChars: AnsiString; // 1..255
  43. WideUpperChars: WideString; // 1..65535
  44. WideLowerChars: WideString; // 1..65535
  45. { the following declarations are from the libc unit for linux so they
  46. might be very linux centric
  47. maybe this needs to be splitted in an os depend way later }
  48. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  49. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  50. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  51. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  52. const
  53. {$ifdef linux}
  54. __LC_CTYPE = 0;
  55. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  56. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  57. CODESET = _NL_CTYPE_CODESET_NAME;
  58. {$else linux}
  59. {$ifdef darwin}
  60. CODESET = 0;
  61. {$else darwin}
  62. {$ifdef solaris}
  63. CODESET = 0;
  64. {$else solaris}
  65. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  66. CODESET = 0;
  67. {$else freebsd}
  68. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  69. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  70. {$endif FreeBSD}
  71. {$endif solaris}
  72. {$endif darwin}
  73. {$endif linux}
  74. { unicode encoding name }
  75. {$ifdef FPC_LITTLE_ENDIAN}
  76. unicode_encoding = 'UNICODELITTLE';
  77. {$else FPC_LITTLE_ENDIAN}
  78. unicode_encoding = 'UNICODEBIG';
  79. {$endif FPC_LITTLE_ENDIAN}
  80. type
  81. piconv_t = ^iconv_t;
  82. iconv_t = pointer;
  83. nl_item = cint;
  84. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  85. {$ifndef Darwin}
  86. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  87. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  88. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  89. {$else}
  90. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  91. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  92. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  93. {$endif}
  94. var
  95. iconv_ansi2ucs4,
  96. iconv_ucs42ansi,
  97. iconv_ansi2wide,
  98. iconv_wide2ansi : iconv_t;
  99. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  100. var
  101. outlength,
  102. outoffset,
  103. srclen,
  104. outleft : size_t;
  105. srcpos : pwidechar;
  106. destpos: pchar;
  107. mynil : pchar;
  108. my0 : size_t;
  109. iconv : iconv_t;
  110. begin
  111. { conversion descriptors aren't thread safe }
  112. if IsMultithreaded then
  113. iconv:=iconv_open(nl_langinfo(CODESET),unicode_encoding)
  114. else
  115. iconv:=iconv_wide2ansi;
  116. mynil:=nil;
  117. my0:=0;
  118. { rought estimation }
  119. setlength(dest,len*3);
  120. outlength:=len*3;
  121. srclen:=len*2;
  122. srcpos:=source;
  123. destpos:=pchar(dest);
  124. outleft:=outlength;
  125. while iconv(iconv,@srcpos,@srclen,@destpos,@outleft)=size_t(-1) do
  126. begin
  127. case fpgetCerrno of
  128. ESysEILSEQ:
  129. begin
  130. { skip and set to '?' }
  131. inc(srcpos);
  132. dec(srclen,2);
  133. destpos^:='?';
  134. inc(destpos);
  135. dec(outleft);
  136. { reset }
  137. iconv(iconv,@mynil,@my0,@mynil,@my0);
  138. end;
  139. ESysE2BIG:
  140. begin
  141. outoffset:=destpos-pchar(dest);
  142. { extend }
  143. setlength(dest,outlength+len*3);
  144. inc(outleft,len*3);
  145. inc(outlength,len*3);
  146. { string could have been moved }
  147. destpos:=pchar(dest)+outoffset;
  148. end;
  149. else
  150. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  151. end;
  152. end;
  153. // truncate string
  154. setlength(dest,length(dest)-outleft);
  155. if IsMultithreaded then
  156. iconv_close(iconv);
  157. end;
  158. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  159. var
  160. outlength,
  161. outoffset,
  162. outleft : size_t;
  163. srcpos,
  164. destpos: pchar;
  165. mynil : pchar;
  166. my0 : size_t;
  167. iconv : iconv_t;
  168. begin
  169. { conversion descriptors aren't thread safe }
  170. if IsMultithreaded then
  171. iconv:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  172. else
  173. iconv:=iconv_ansi2wide;
  174. mynil:=nil;
  175. my0:=0;
  176. // extra space
  177. outlength:=len+1;
  178. setlength(dest,outlength);
  179. outlength:=len+1;
  180. srcpos:=source;
  181. destpos:=pchar(dest);
  182. outleft:=outlength*2;
  183. while iconv(iconv,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  184. begin
  185. case fpgetCerrno of
  186. ESysEILSEQ:
  187. begin
  188. { skip and set to '?' }
  189. inc(srcpos);
  190. pwidechar(destpos)^:='?';
  191. inc(destpos,2);
  192. dec(outleft,2);
  193. { reset }
  194. iconv(iconv,@mynil,@my0,@mynil,@my0);
  195. end;
  196. ESysE2BIG:
  197. begin
  198. outoffset:=destpos-pchar(dest);
  199. { extend }
  200. setlength(dest,outlength+len);
  201. inc(outleft,len*2);
  202. inc(outlength,len);
  203. { string could have been moved }
  204. destpos:=pchar(dest)+outoffset;
  205. end;
  206. else
  207. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  208. end;
  209. end;
  210. // truncate string
  211. setlength(dest,length(dest)-outleft div 2);
  212. if IsMultithreaded then
  213. iconv_close(iconv);
  214. end;
  215. function LowerWideString(const s : WideString) : WideString;
  216. var
  217. i : SizeInt;
  218. begin
  219. SetLength(result,length(s));
  220. for i:=1 to length(s) do
  221. result[i]:=WideChar(towlower(wint_t(s[i])));
  222. end;
  223. function UpperWideString(const s : WideString) : WideString;
  224. var
  225. i : SizeInt;
  226. begin
  227. SetLength(result,length(s));
  228. for i:=1 to length(s) do
  229. result[i]:=WideChar(towupper(wint_t(s[i])));
  230. end;
  231. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  232. var
  233. outlength,
  234. outoffset,
  235. outleft : size_t;
  236. srcpos,
  237. destpos: pchar;
  238. mynil : pchar;
  239. my0 : size_t;
  240. iconv : iconv_t;
  241. begin
  242. { conversion descriptors aren't thread safe }
  243. if IsMultithreaded then
  244. iconv:=iconv_open('UCS4',nl_langinfo(CODESET));
  245. else
  246. iconv:=iconv_ansi2ucs4;
  247. mynil:=nil;
  248. my0:=0;
  249. // extra space
  250. outlength:=len+1;
  251. setlength(dest,outlength);
  252. outlength:=len+1;
  253. srcpos:=source;
  254. destpos:=pchar(dest);
  255. outleft:=outlength*4;
  256. while iconv(iconv,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  257. begin
  258. case fpgetCerrno of
  259. ESysE2BIG:
  260. begin
  261. outoffset:=destpos-pchar(dest);
  262. { extend }
  263. setlength(dest,outlength+len);
  264. inc(outleft,len*4);
  265. inc(outlength,len);
  266. { string could have been moved }
  267. destpos:=pchar(dest)+outoffset;
  268. end;
  269. else
  270. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  271. end;
  272. end;
  273. // truncate string
  274. setlength(dest,length(dest)-outleft div 4);
  275. if IsMultithreaded then
  276. iconv_close(iconv);
  277. end;
  278. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  279. var
  280. hs1,hs2 : UCS4String;
  281. begin
  282. hs1:=WideStringToUCS4String(s1);
  283. hs2:=WideStringToUCS4String(s2);
  284. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  285. end;
  286. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  287. begin
  288. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  289. end;
  290. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  291. begin
  292. result:=strcoll(s1,s2);
  293. end;
  294. Procedure SetCWideStringManager;
  295. Var
  296. CWideStringManager : TWideStringManager;
  297. begin
  298. CWideStringManager:=widestringmanager;
  299. With CWideStringManager do
  300. begin
  301. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  302. Ansi2WideMoveProc:=@Ansi2WideMove;
  303. UpperWideStringProc:=@UpperWideString;
  304. LowerWideStringProc:=@LowerWideString;
  305. CompareWideStringProc:=@CompareWideString;
  306. CompareTextWideStringProc:=@CompareTextWideString;
  307. {
  308. CharLengthPCharProc
  309. UpperAnsiStringProc
  310. LowerAnsiStringProc
  311. CompareStrAnsiStringProc
  312. CompareTextAnsiStringProc
  313. }
  314. StrCompAnsiStringProc:=@StrCompAnsi;
  315. {
  316. StrICompAnsiStringProc
  317. StrLCompAnsiStringProc
  318. StrLICompAnsiStringProc
  319. StrLowerAnsiStringProc
  320. StrUpperAnsiStringProc
  321. }
  322. end;
  323. SetWideStringManager(CWideStringManager);
  324. end;
  325. initialization
  326. SetCWideStringManager;
  327. { init conversion tables }
  328. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  329. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  330. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  331. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  332. finalization
  333. iconv_close(iconv_ansi2wide);
  334. end.