cwstring.pp 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$endif linux}
  21. Uses
  22. BaseUnix,
  23. ctypes,
  24. unix,
  25. unixtype,
  26. sysutils,
  27. initc;
  28. Const
  29. {$ifdef Linux}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { Case-mapping "arrays" }
  35. var
  36. AnsiUpperChars: AnsiString; // 1..255
  37. AnsiLowerChars: AnsiString; // 1..255
  38. WideUpperChars: WideString; // 1..65535
  39. WideLowerChars: WideString; // 1..65535
  40. { the following declarations are from the libc unit for linux so they
  41. might be very linux centric
  42. maybe this needs to be splitted in an os depend way later }
  43. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  44. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  45. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  46. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  47. const
  48. {$ifdef linux}
  49. __LC_CTYPE = 0;
  50. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  51. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  52. CODESET = _NL_CTYPE_CODESET_NAME;
  53. {$else linux}
  54. {$ifdef darwin}
  55. CODESET = 0;
  56. {$else darwin}
  57. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  58. CODESET = 0;
  59. {$else freebsd}
  60. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  61. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  62. {$endif FreeBSD}
  63. {$endif darwin}
  64. {$endif linux}
  65. { unicode encoding name }
  66. {$ifdef FPC_LITTLE_ENDIAN}
  67. unicode_encoding = 'UNICODELITTLE';
  68. {$else FPC_LITTLE_ENDIAN}
  69. unicode_encoding = 'UNICODEBIG';
  70. {$endif FPC_LITTLE_ENDIAN}
  71. type
  72. piconv_t = ^iconv_t;
  73. iconv_t = pointer;
  74. nl_item = cint;
  75. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  76. {$ifndef Darwin}
  77. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  78. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  79. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  80. {$else}
  81. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  82. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  83. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  84. {$endif}
  85. var
  86. iconv_ansi2ucs4,
  87. iconv_ucs42ansi,
  88. iconv_ansi2wide,
  89. iconv_wide2ansi : iconv_t;
  90. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  91. var
  92. outlength,
  93. outoffset,
  94. srclen,
  95. outleft : size_t;
  96. srcpos : pwidechar;
  97. destpos: pchar;
  98. mynil : pchar;
  99. my0 : size_t;
  100. begin
  101. mynil:=nil;
  102. my0:=0;
  103. { rought estimation }
  104. setlength(dest,len*3);
  105. outlength:=len*3;
  106. srclen:=len*2;
  107. srcpos:=source;
  108. destpos:=pchar(dest);
  109. outleft:=outlength;
  110. while iconv(iconv_wide2ansi,@srcpos,@srclen,@destpos,@outleft)=size_t(-1) do
  111. begin
  112. case fpgetCerrno of
  113. ESysEILSEQ:
  114. begin
  115. { skip and set to '?' }
  116. inc(srcpos);
  117. dec(srclen,2);
  118. destpos^:='?';
  119. inc(destpos);
  120. dec(outleft);
  121. { reset }
  122. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  123. end;
  124. ESysE2BIG:
  125. begin
  126. outoffset:=destpos-pchar(dest);
  127. { extend }
  128. setlength(dest,outlength+len*3);
  129. inc(outleft,len*3);
  130. inc(outlength,len*3);
  131. { string could have been moved }
  132. destpos:=pchar(dest)+outoffset;
  133. end;
  134. else
  135. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  136. end;
  137. end;
  138. // truncate string
  139. setlength(dest,length(dest)-outleft);
  140. end;
  141. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  142. var
  143. outlength,
  144. outoffset,
  145. outleft : size_t;
  146. srcpos,
  147. destpos: pchar;
  148. mynil : pchar;
  149. my0 : size_t;
  150. begin
  151. mynil:=nil;
  152. my0:=0;
  153. // extra space
  154. outlength:=len+1;
  155. setlength(dest,outlength);
  156. outlength:=len+1;
  157. srcpos:=source;
  158. destpos:=pchar(dest);
  159. outleft:=outlength*2;
  160. while iconv(iconv_ansi2wide,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  161. begin
  162. case fpgetCerrno of
  163. ESysEILSEQ:
  164. begin
  165. { skip and set to '?' }
  166. inc(srcpos);
  167. pwidechar(destpos)^:='?';
  168. inc(destpos,2);
  169. dec(outleft,2);
  170. { reset }
  171. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  172. end;
  173. ESysE2BIG:
  174. begin
  175. outoffset:=destpos-pchar(dest);
  176. { extend }
  177. setlength(dest,outlength+len);
  178. inc(outleft,len*2);
  179. inc(outlength,len);
  180. { string could have been moved }
  181. destpos:=pchar(dest)+outoffset;
  182. end;
  183. else
  184. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  185. end;
  186. end;
  187. // truncate string
  188. setlength(dest,length(dest)-outleft div 2);
  189. end;
  190. function LowerWideString(const s : WideString) : WideString;
  191. var
  192. i : SizeInt;
  193. begin
  194. SetLength(result,length(s));
  195. for i:=1 to length(s) do
  196. result[i]:=WideChar(towlower(wint_t(s[i])));
  197. end;
  198. function UpperWideString(const s : WideString) : WideString;
  199. var
  200. i : SizeInt;
  201. begin
  202. SetLength(result,length(s));
  203. for i:=1 to length(s) do
  204. result[i]:=WideChar(towupper(wint_t(s[i])));
  205. end;
  206. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  207. var
  208. outlength,
  209. outoffset,
  210. outleft : size_t;
  211. srcpos,
  212. destpos: pchar;
  213. mynil : pchar;
  214. my0 : size_t;
  215. begin
  216. mynil:=nil;
  217. my0:=0;
  218. // extra space
  219. outlength:=len+1;
  220. setlength(dest,outlength);
  221. outlength:=len+1;
  222. srcpos:=source;
  223. destpos:=pchar(dest);
  224. outleft:=outlength*4;
  225. while iconv(iconv_ansi2ucs4,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  226. begin
  227. case fpgetCerrno of
  228. ESysE2BIG:
  229. begin
  230. outoffset:=destpos-pchar(dest);
  231. { extend }
  232. setlength(dest,outlength+len);
  233. inc(outleft,len*4);
  234. inc(outlength,len);
  235. { string could have been moved }
  236. destpos:=pchar(dest)+outoffset;
  237. end;
  238. else
  239. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  240. end;
  241. end;
  242. // truncate string
  243. setlength(dest,length(dest)-outleft div 4);
  244. end;
  245. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  246. var
  247. hs1,hs2 : UCS4String;
  248. begin
  249. hs1:=WideStringToUCS4String(s1);
  250. hs2:=WideStringToUCS4String(s2);
  251. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  252. end;
  253. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  254. begin
  255. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  256. end;
  257. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  258. begin
  259. result:=strcoll(s1,s2);
  260. end;
  261. Procedure SetCWideStringManager;
  262. Var
  263. CWideStringManager : TWideStringManager;
  264. begin
  265. CWideStringManager:=widestringmanager;
  266. With CWideStringManager do
  267. begin
  268. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  269. Ansi2WideMoveProc:=@Ansi2WideMove;
  270. UpperWideStringProc:=@UpperWideString;
  271. LowerWideStringProc:=@LowerWideString;
  272. CompareWideStringProc:=@CompareWideString;
  273. CompareTextWideStringProc:=@CompareTextWideString;
  274. {
  275. CharLengthPCharProc
  276. UpperAnsiStringProc
  277. LowerAnsiStringProc
  278. CompareStrAnsiStringProc
  279. CompareTextAnsiStringProc
  280. }
  281. StrCompAnsiStringProc:=@StrCompAnsi;
  282. {
  283. StrICompAnsiStringProc
  284. StrLCompAnsiStringProc
  285. StrLICompAnsiStringProc
  286. StrLowerAnsiStringProc
  287. StrUpperAnsiStringProc
  288. }
  289. end;
  290. SetWideStringManager(CWideStringManager);
  291. end;
  292. initialization
  293. SetCWideStringManager;
  294. { init conversion tables }
  295. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  296. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  297. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  298. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  299. finalization
  300. iconv_close(iconv_ansi2wide);
  301. end.