cwstring.pp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$ifndef FreeBSD5}
  20. {$ifndef SunOS}
  21. {$linklib iconv}
  22. {$define useiconv}
  23. {$endif}
  24. {$endif}
  25. {$endif linux}
  26. Uses
  27. BaseUnix,
  28. ctypes,
  29. unix,
  30. unixtype,
  31. sysutils,
  32. initc;
  33. Const
  34. {$ifndef useiconv}
  35. libiconvname='c'; // is in libc for several OSes
  36. {$else}
  37. libiconvname='iconv';
  38. {$endif}
  39. { Case-mapping "arrays" }
  40. var
  41. AnsiUpperChars: AnsiString; // 1..255
  42. AnsiLowerChars: AnsiString; // 1..255
  43. WideUpperChars: WideString; // 1..65535
  44. WideLowerChars: WideString; // 1..65535
  45. { the following declarations are from the libc unit for linux so they
  46. might be very linux centric
  47. maybe this needs to be splitted in an os depend way later }
  48. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  49. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  50. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  51. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  52. const
  53. {$ifdef linux}
  54. __LC_CTYPE = 0;
  55. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  56. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  57. CODESET = _NL_CTYPE_CODESET_NAME;
  58. {$else linux}
  59. {$ifdef darwin}
  60. CODESET = 0;
  61. {$else darwin}
  62. {$ifdef solaris}
  63. CODESET = 0;
  64. {$else solaris}
  65. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  66. CODESET = 0;
  67. {$else freebsd}
  68. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  69. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  70. {$endif FreeBSD}
  71. {$endif solaris}
  72. {$endif darwin}
  73. {$endif linux}
  74. { unicode encoding name }
  75. {$ifdef FPC_LITTLE_ENDIAN}
  76. unicode_encoding = 'UNICODELITTLE';
  77. {$else FPC_LITTLE_ENDIAN}
  78. unicode_encoding = 'UNICODEBIG';
  79. {$endif FPC_LITTLE_ENDIAN}
  80. type
  81. piconv_t = ^iconv_t;
  82. iconv_t = pointer;
  83. nl_item = cint;
  84. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  85. {$ifndef Darwin}
  86. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  87. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  88. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  89. {$else}
  90. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  91. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  92. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  93. {$endif}
  94. var
  95. iconv_ansi2ucs4,
  96. iconv_ucs42ansi,
  97. iconv_ansi2wide,
  98. iconv_wide2ansi : iconv_t;
  99. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  100. var
  101. outlength,
  102. outoffset,
  103. srclen,
  104. outleft : size_t;
  105. srcpos : pwidechar;
  106. destpos: pchar;
  107. mynil : pchar;
  108. my0 : size_t;
  109. begin
  110. mynil:=nil;
  111. my0:=0;
  112. { rought estimation }
  113. setlength(dest,len*3);
  114. outlength:=len*3;
  115. srclen:=len*2;
  116. srcpos:=source;
  117. destpos:=pchar(dest);
  118. outleft:=outlength;
  119. while iconv(iconv_wide2ansi,@srcpos,@srclen,@destpos,@outleft)=size_t(-1) do
  120. begin
  121. case fpgetCerrno of
  122. ESysEILSEQ:
  123. begin
  124. { skip and set to '?' }
  125. inc(srcpos);
  126. dec(srclen,2);
  127. destpos^:='?';
  128. inc(destpos);
  129. dec(outleft);
  130. { reset }
  131. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  132. end;
  133. ESysE2BIG:
  134. begin
  135. outoffset:=destpos-pchar(dest);
  136. { extend }
  137. setlength(dest,outlength+len*3);
  138. inc(outleft,len*3);
  139. inc(outlength,len*3);
  140. { string could have been moved }
  141. destpos:=pchar(dest)+outoffset;
  142. end;
  143. else
  144. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  145. end;
  146. end;
  147. // truncate string
  148. setlength(dest,length(dest)-outleft);
  149. end;
  150. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  151. var
  152. outlength,
  153. outoffset,
  154. outleft : size_t;
  155. srcpos,
  156. destpos: pchar;
  157. mynil : pchar;
  158. my0 : size_t;
  159. begin
  160. mynil:=nil;
  161. my0:=0;
  162. // extra space
  163. outlength:=len+1;
  164. setlength(dest,outlength);
  165. outlength:=len+1;
  166. srcpos:=source;
  167. destpos:=pchar(dest);
  168. outleft:=outlength*2;
  169. while iconv(iconv_ansi2wide,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  170. begin
  171. case fpgetCerrno of
  172. ESysEILSEQ:
  173. begin
  174. { skip and set to '?' }
  175. inc(srcpos);
  176. pwidechar(destpos)^:='?';
  177. inc(destpos,2);
  178. dec(outleft,2);
  179. { reset }
  180. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  181. end;
  182. ESysE2BIG:
  183. begin
  184. outoffset:=destpos-pchar(dest);
  185. { extend }
  186. setlength(dest,outlength+len);
  187. inc(outleft,len*2);
  188. inc(outlength,len);
  189. { string could have been moved }
  190. destpos:=pchar(dest)+outoffset;
  191. end;
  192. else
  193. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  194. end;
  195. end;
  196. // truncate string
  197. setlength(dest,length(dest)-outleft div 2);
  198. end;
  199. function LowerWideString(const s : WideString) : WideString;
  200. var
  201. i : SizeInt;
  202. begin
  203. SetLength(result,length(s));
  204. for i:=1 to length(s) do
  205. result[i]:=WideChar(towlower(wint_t(s[i])));
  206. end;
  207. function UpperWideString(const s : WideString) : WideString;
  208. var
  209. i : SizeInt;
  210. begin
  211. SetLength(result,length(s));
  212. for i:=1 to length(s) do
  213. result[i]:=WideChar(towupper(wint_t(s[i])));
  214. end;
  215. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  216. var
  217. outlength,
  218. outoffset,
  219. outleft : size_t;
  220. srcpos,
  221. destpos: pchar;
  222. mynil : pchar;
  223. my0 : size_t;
  224. begin
  225. mynil:=nil;
  226. my0:=0;
  227. // extra space
  228. outlength:=len+1;
  229. setlength(dest,outlength);
  230. outlength:=len+1;
  231. srcpos:=source;
  232. destpos:=pchar(dest);
  233. outleft:=outlength*4;
  234. while iconv(iconv_ansi2ucs4,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  235. begin
  236. case fpgetCerrno of
  237. ESysE2BIG:
  238. begin
  239. outoffset:=destpos-pchar(dest);
  240. { extend }
  241. setlength(dest,outlength+len);
  242. inc(outleft,len*4);
  243. inc(outlength,len);
  244. { string could have been moved }
  245. destpos:=pchar(dest)+outoffset;
  246. end;
  247. else
  248. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  249. end;
  250. end;
  251. // truncate string
  252. setlength(dest,length(dest)-outleft div 4);
  253. end;
  254. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  255. var
  256. hs1,hs2 : UCS4String;
  257. begin
  258. hs1:=WideStringToUCS4String(s1);
  259. hs2:=WideStringToUCS4String(s2);
  260. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  261. end;
  262. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  263. begin
  264. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  265. end;
  266. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  267. begin
  268. result:=strcoll(s1,s2);
  269. end;
  270. Procedure SetCWideStringManager;
  271. Var
  272. CWideStringManager : TWideStringManager;
  273. begin
  274. CWideStringManager:=widestringmanager;
  275. With CWideStringManager do
  276. begin
  277. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  278. Ansi2WideMoveProc:=@Ansi2WideMove;
  279. UpperWideStringProc:=@UpperWideString;
  280. LowerWideStringProc:=@LowerWideString;
  281. CompareWideStringProc:=@CompareWideString;
  282. CompareTextWideStringProc:=@CompareTextWideString;
  283. {
  284. CharLengthPCharProc
  285. UpperAnsiStringProc
  286. LowerAnsiStringProc
  287. CompareStrAnsiStringProc
  288. CompareTextAnsiStringProc
  289. }
  290. StrCompAnsiStringProc:=@StrCompAnsi;
  291. {
  292. StrICompAnsiStringProc
  293. StrLCompAnsiStringProc
  294. StrLICompAnsiStringProc
  295. StrLowerAnsiStringProc
  296. StrUpperAnsiStringProc
  297. }
  298. end;
  299. SetWideStringManager(CWideStringManager);
  300. end;
  301. initialization
  302. SetCWideStringManager;
  303. { init conversion tables }
  304. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  305. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  306. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  307. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  308. finalization
  309. iconv_close(iconv_ansi2wide);
  310. end.