cwstring.pp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$ifndef FreeBSD5}
  20. {$linklib iconv}
  21. {$define useiconv}
  22. {$endif}
  23. {$endif linux}
  24. Uses
  25. BaseUnix,
  26. ctypes,
  27. unix,
  28. unixtype,
  29. sysutils,
  30. initc;
  31. Const
  32. {$ifndef useiconv}
  33. libiconvname='c'; // is in libc under Linux.
  34. {$else}
  35. libiconvname='iconv';
  36. {$endif}
  37. { Case-mapping "arrays" }
  38. var
  39. AnsiUpperChars: AnsiString; // 1..255
  40. AnsiLowerChars: AnsiString; // 1..255
  41. WideUpperChars: WideString; // 1..65535
  42. WideLowerChars: WideString; // 1..65535
  43. { the following declarations are from the libc unit for linux so they
  44. might be very linux centric
  45. maybe this needs to be splitted in an os depend way later }
  46. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  47. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  48. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  49. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  50. const
  51. {$ifdef linux}
  52. __LC_CTYPE = 0;
  53. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  54. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  55. CODESET = _NL_CTYPE_CODESET_NAME;
  56. {$else linux}
  57. {$ifdef darwin}
  58. CODESET = 0;
  59. {$else darwin}
  60. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  61. CODESET = 0;
  62. {$else freebsd}
  63. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  64. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  65. {$endif FreeBSD}
  66. {$endif darwin}
  67. {$endif linux}
  68. { unicode encoding name }
  69. {$ifdef FPC_LITTLE_ENDIAN}
  70. unicode_encoding = 'UNICODELITTLE';
  71. {$else FPC_LITTLE_ENDIAN}
  72. unicode_encoding = 'UNICODEBIG';
  73. {$endif FPC_LITTLE_ENDIAN}
  74. type
  75. piconv_t = ^iconv_t;
  76. iconv_t = pointer;
  77. nl_item = cint;
  78. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  79. {$ifndef Darwin}
  80. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  81. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  82. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  83. {$else}
  84. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  85. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  86. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  87. {$endif}
  88. var
  89. iconv_ansi2ucs4,
  90. iconv_ucs42ansi,
  91. iconv_ansi2wide,
  92. iconv_wide2ansi : iconv_t;
  93. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  94. var
  95. outlength,
  96. outoffset,
  97. srclen,
  98. outleft : size_t;
  99. srcpos : pwidechar;
  100. destpos: pchar;
  101. mynil : pchar;
  102. my0 : size_t;
  103. conv : iconv_t;
  104. begin
  105. { conversion descriptors aren't thread safe }
  106. if IsMultithread then
  107. conv:=iconv_open(nl_langinfo(CODESET),unicode_encoding)
  108. else
  109. conv:=iconv_wide2ansi;
  110. mynil:=nil;
  111. my0:=0;
  112. { rought estimation }
  113. setlength(dest,len*3);
  114. outlength:=len*3;
  115. srclen:=len*2;
  116. srcpos:=source;
  117. destpos:=pchar(dest);
  118. outleft:=outlength;
  119. while iconv(conv,@srcpos,@srclen,@destpos,@outleft)=size_t(-1) do
  120. begin
  121. case fpgetCerrno of
  122. ESysEILSEQ:
  123. begin
  124. { skip and set to '?' }
  125. inc(srcpos);
  126. dec(srclen,2);
  127. destpos^:='?';
  128. inc(destpos);
  129. dec(outleft);
  130. { reset }
  131. iconv(conv,@mynil,@my0,@mynil,@my0);
  132. end;
  133. ESysE2BIG:
  134. begin
  135. outoffset:=destpos-pchar(dest);
  136. { extend }
  137. setlength(dest,outlength+len*3);
  138. inc(outleft,len*3);
  139. inc(outlength,len*3);
  140. { string could have been moved }
  141. destpos:=pchar(dest)+outoffset;
  142. end;
  143. else
  144. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  145. end;
  146. end;
  147. // truncate string
  148. setlength(dest,length(dest)-outleft);
  149. if IsMultithread then
  150. iconv_close(conv);
  151. end;
  152. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  153. var
  154. outlength,
  155. outoffset,
  156. outleft : size_t;
  157. srcpos,
  158. destpos: pchar;
  159. mynil : pchar;
  160. my0 : size_t;
  161. conv : iconv_t;
  162. begin
  163. { conversion descriptors aren't thread safe }
  164. if IsMultithread then
  165. conv:=iconv_open(unicode_encoding,nl_langinfo(CODESET))
  166. else
  167. conv:=iconv_ansi2wide;
  168. mynil:=nil;
  169. my0:=0;
  170. // extra space
  171. outlength:=len+1;
  172. setlength(dest,outlength);
  173. outlength:=len+1;
  174. srcpos:=source;
  175. destpos:=pchar(dest);
  176. outleft:=outlength*2;
  177. while iconv(conv,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  178. begin
  179. case fpgetCerrno of
  180. ESysEILSEQ:
  181. begin
  182. { skip and set to '?' }
  183. inc(srcpos);
  184. pwidechar(destpos)^:='?';
  185. inc(destpos,2);
  186. dec(outleft,2);
  187. { reset }
  188. iconv(conv,@mynil,@my0,@mynil,@my0);
  189. end;
  190. ESysE2BIG:
  191. begin
  192. outoffset:=destpos-pchar(dest);
  193. { extend }
  194. setlength(dest,outlength+len);
  195. inc(outleft,len*2);
  196. inc(outlength,len);
  197. { string could have been moved }
  198. destpos:=pchar(dest)+outoffset;
  199. end;
  200. else
  201. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  202. end;
  203. end;
  204. // truncate string
  205. setlength(dest,length(dest)-outleft div 2);
  206. if IsMultithread then
  207. iconv_close(conv);
  208. end;
  209. function LowerWideString(const s : WideString) : WideString;
  210. var
  211. i : SizeInt;
  212. begin
  213. SetLength(result,length(s));
  214. for i:=1 to length(s) do
  215. result[i]:=WideChar(towlower(wint_t(s[i])));
  216. end;
  217. function UpperWideString(const s : WideString) : WideString;
  218. var
  219. i : SizeInt;
  220. begin
  221. SetLength(result,length(s));
  222. for i:=1 to length(s) do
  223. result[i]:=WideChar(towupper(wint_t(s[i])));
  224. end;
  225. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  226. var
  227. outlength,
  228. outoffset,
  229. outleft : size_t;
  230. srcpos,
  231. destpos: pchar;
  232. mynil : pchar;
  233. my0 : size_t;
  234. conv : iconv_t;
  235. begin
  236. { conversion descriptors aren't thread safe }
  237. if IsMultithread then
  238. conv:=iconv_open('UCS4',nl_langinfo(CODESET))
  239. else
  240. conv:=iconv_ansi2ucs4;
  241. mynil:=nil;
  242. my0:=0;
  243. // extra space
  244. outlength:=len+1;
  245. setlength(dest,outlength);
  246. outlength:=len+1;
  247. srcpos:=source;
  248. destpos:=pchar(dest);
  249. outleft:=outlength*4;
  250. while iconv(conv,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  251. begin
  252. case fpgetCerrno of
  253. ESysE2BIG:
  254. begin
  255. outoffset:=destpos-pchar(dest);
  256. { extend }
  257. setlength(dest,outlength+len);
  258. inc(outleft,len*4);
  259. inc(outlength,len);
  260. { string could have been moved }
  261. destpos:=pchar(dest)+outoffset;
  262. end;
  263. else
  264. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  265. end;
  266. end;
  267. // truncate string
  268. setlength(dest,length(dest)-outleft div 4);
  269. if IsMultithread then
  270. iconv_close(conv);
  271. end;
  272. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  273. var
  274. hs1,hs2 : UCS4String;
  275. begin
  276. hs1:=WideStringToUCS4String(s1);
  277. hs2:=WideStringToUCS4String(s2);
  278. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  279. end;
  280. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  281. begin
  282. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  283. end;
  284. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  285. begin
  286. result:=strcoll(s1,s2);
  287. end;
  288. Procedure SetCWideStringManager;
  289. Var
  290. CWideStringManager : TWideStringManager;
  291. begin
  292. CWideStringManager:=widestringmanager;
  293. With CWideStringManager do
  294. begin
  295. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  296. Ansi2WideMoveProc:=@Ansi2WideMove;
  297. UpperWideStringProc:=@UpperWideString;
  298. LowerWideStringProc:=@LowerWideString;
  299. CompareWideStringProc:=@CompareWideString;
  300. CompareTextWideStringProc:=@CompareTextWideString;
  301. {
  302. CharLengthPCharProc
  303. UpperAnsiStringProc
  304. LowerAnsiStringProc
  305. CompareStrAnsiStringProc
  306. CompareTextAnsiStringProc
  307. }
  308. StrCompAnsiStringProc:=@StrCompAnsi;
  309. {
  310. StrICompAnsiStringProc
  311. StrLCompAnsiStringProc
  312. StrLICompAnsiStringProc
  313. StrLowerAnsiStringProc
  314. StrUpperAnsiStringProc
  315. }
  316. end;
  317. SetWideStringManager(CWideStringManager);
  318. end;
  319. initialization
  320. SetCWideStringManager;
  321. { init conversion tables }
  322. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  323. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  324. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  325. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  326. finalization
  327. iconv_close(iconv_ansi2wide);
  328. end.