cwstring.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$ifndef FreeBSD5}
  20. {$linklib iconv}
  21. {$define useiconv}
  22. {$endif}
  23. {$endif linux}
  24. Uses
  25. BaseUnix,
  26. ctypes,
  27. unix,
  28. unixtype,
  29. sysutils,
  30. initc;
  31. Const
  32. {$ifndef useiconv}
  33. libiconvname='c'; // is in libc under Linux.
  34. {$else}
  35. libiconvname='iconv';
  36. {$endif}
  37. { Case-mapping "arrays" }
  38. var
  39. AnsiUpperChars: AnsiString; // 1..255
  40. AnsiLowerChars: AnsiString; // 1..255
  41. WideUpperChars: WideString; // 1..65535
  42. WideLowerChars: WideString; // 1..65535
  43. { the following declarations are from the libc unit for linux so they
  44. might be very linux centric
  45. maybe this needs to be splitted in an os depend way later }
  46. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  47. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  48. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  49. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  50. const
  51. {$ifdef linux}
  52. __LC_CTYPE = 0;
  53. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  54. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  55. CODESET = _NL_CTYPE_CODESET_NAME;
  56. {$else linux}
  57. {$ifdef darwin}
  58. CODESET = 0;
  59. {$else darwin}
  60. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  61. CODESET = 0;
  62. {$else freebsd}
  63. {$ifdef solaris}
  64. CODESET=49;
  65. {$else}
  66. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  67. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  68. {$endif solaris}
  69. {$endif FreeBSD}
  70. {$endif darwin}
  71. {$endif linux}
  72. { unicode encoding name }
  73. {$ifdef FPC_LITTLE_ENDIAN}
  74. unicode_encoding = 'UNICODELITTLE';
  75. {$else FPC_LITTLE_ENDIAN}
  76. unicode_encoding = 'UNICODEBIG';
  77. {$endif FPC_LITTLE_ENDIAN}
  78. type
  79. piconv_t = ^iconv_t;
  80. iconv_t = pointer;
  81. nl_item = cint;
  82. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  83. {$ifndef Darwin}
  84. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  85. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  86. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  87. {$else}
  88. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  89. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  90. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  91. {$endif}
  92. var
  93. iconv_ansi2ucs4,
  94. iconv_ucs42ansi,
  95. iconv_ansi2wide,
  96. iconv_wide2ansi : iconv_t;
  97. lock_ansi2ucs4 : integer = -1;
  98. lock_ucs42ansi : integer = -1;
  99. lock_ansi2wide : integer = -1;
  100. lock_wide2ansi : integer = -1;
  101. procedure lockiconv(var lockcount: integer);
  102. begin
  103. while interlockedincrement(lockcount) <> 0 do begin
  104. interlockeddecrement(lockcount);
  105. sleep(0);
  106. end;
  107. end;
  108. procedure unlockiconv(var lockcount: integer);
  109. begin
  110. interlockeddecrement(lockcount);
  111. end;
  112. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  113. var
  114. outlength,
  115. outoffset,
  116. srclen,
  117. outleft : size_t;
  118. srcpos : pwidechar;
  119. destpos: pchar;
  120. mynil : pchar;
  121. my0 : size_t;
  122. begin
  123. mynil:=nil;
  124. my0:=0;
  125. { rought estimation }
  126. setlength(dest,len*3);
  127. outlength:=len*3;
  128. srclen:=len*2;
  129. srcpos:=source;
  130. destpos:=pchar(dest);
  131. outleft:=outlength;
  132. lockiconv(lock_wide2ansi);
  133. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  134. begin
  135. case fpgetCerrno of
  136. ESysEILSEQ:
  137. begin
  138. { skip and set to '?' }
  139. inc(srcpos);
  140. dec(srclen,2);
  141. destpos^:='?';
  142. inc(destpos);
  143. dec(outleft);
  144. { reset }
  145. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  146. end;
  147. ESysE2BIG:
  148. begin
  149. outoffset:=destpos-pchar(dest);
  150. { extend }
  151. setlength(dest,outlength+len*3);
  152. inc(outleft,len*3);
  153. inc(outlength,len*3);
  154. { string could have been moved }
  155. destpos:=pchar(dest)+outoffset;
  156. end;
  157. else
  158. begin
  159. unlockiconv(lock_wide2ansi);
  160. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  161. end;
  162. end;
  163. end;
  164. unlockiconv(lock_wide2ansi);
  165. // truncate string
  166. setlength(dest,length(dest)-outleft);
  167. end;
  168. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  169. var
  170. outlength,
  171. outoffset,
  172. outleft : size_t;
  173. srcpos,
  174. destpos: pchar;
  175. mynil : pchar;
  176. my0 : size_t;
  177. begin
  178. mynil:=nil;
  179. my0:=0;
  180. // extra space
  181. outlength:=len+1;
  182. setlength(dest,outlength);
  183. outlength:=len+1;
  184. srcpos:=source;
  185. destpos:=pchar(dest);
  186. outleft:=outlength*2;
  187. lockiconv(lock_ansi2wide);
  188. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  189. begin
  190. case fpgetCerrno of
  191. ESysEILSEQ:
  192. begin
  193. { skip and set to '?' }
  194. inc(srcpos);
  195. pwidechar(destpos)^:='?';
  196. inc(destpos,2);
  197. dec(outleft,2);
  198. { reset }
  199. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  200. end;
  201. ESysE2BIG:
  202. begin
  203. outoffset:=destpos-pchar(dest);
  204. { extend }
  205. setlength(dest,outlength+len);
  206. inc(outleft,len*2);
  207. inc(outlength,len);
  208. { string could have been moved }
  209. destpos:=pchar(dest)+outoffset;
  210. end;
  211. else
  212. begin
  213. unlockiconv(lock_ansi2wide);
  214. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  215. end;
  216. end;
  217. end;
  218. unlockiconv(lock_ansi2wide);
  219. // truncate string
  220. setlength(dest,length(dest)-outleft div 2);
  221. end;
  222. function LowerWideString(const s : WideString) : WideString;
  223. var
  224. i : SizeInt;
  225. begin
  226. SetLength(result,length(s));
  227. for i:=1 to length(s) do
  228. result[i]:=WideChar(towlower(wint_t(s[i])));
  229. end;
  230. function UpperWideString(const s : WideString) : WideString;
  231. var
  232. i : SizeInt;
  233. begin
  234. SetLength(result,length(s));
  235. for i:=1 to length(s) do
  236. result[i]:=WideChar(towupper(wint_t(s[i])));
  237. end;
  238. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  239. var
  240. outlength,
  241. outoffset,
  242. outleft : size_t;
  243. srcpos,
  244. destpos: pchar;
  245. mynil : pchar;
  246. my0 : size_t;
  247. begin
  248. mynil:=nil;
  249. my0:=0;
  250. // extra space
  251. outlength:=len+1;
  252. setlength(dest,outlength);
  253. outlength:=len+1;
  254. srcpos:=source;
  255. destpos:=pchar(dest);
  256. outleft:=outlength*4;
  257. lockiconv(lock_ansi2ucs4);
  258. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  259. begin
  260. case fpgetCerrno of
  261. ESysE2BIG:
  262. begin
  263. outoffset:=destpos-pchar(dest);
  264. { extend }
  265. setlength(dest,outlength+len);
  266. inc(outleft,len*4);
  267. inc(outlength,len);
  268. { string could have been moved }
  269. destpos:=pchar(dest)+outoffset;
  270. end;
  271. else
  272. begin
  273. unlockiconv(lock_ansi2ucs4);
  274. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  275. end;
  276. end;
  277. end;
  278. unlockiconv(lock_ansi2ucs4);
  279. // truncate string
  280. setlength(dest,length(dest)-outleft div 4);
  281. end;
  282. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  283. var
  284. hs1,hs2 : UCS4String;
  285. begin
  286. hs1:=WideStringToUCS4String(s1);
  287. hs2:=WideStringToUCS4String(s2);
  288. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  289. end;
  290. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  291. begin
  292. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  293. end;
  294. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  295. begin
  296. result:=strcoll(s1,s2);
  297. end;
  298. Procedure SetCWideStringManager;
  299. Var
  300. CWideStringManager : TWideStringManager;
  301. begin
  302. CWideStringManager:=widestringmanager;
  303. With CWideStringManager do
  304. begin
  305. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  306. Ansi2WideMoveProc:=@Ansi2WideMove;
  307. UpperWideStringProc:=@UpperWideString;
  308. LowerWideStringProc:=@LowerWideString;
  309. CompareWideStringProc:=@CompareWideString;
  310. CompareTextWideStringProc:=@CompareTextWideString;
  311. {
  312. CharLengthPCharProc
  313. UpperAnsiStringProc
  314. LowerAnsiStringProc
  315. CompareStrAnsiStringProc
  316. CompareTextAnsiStringProc
  317. }
  318. StrCompAnsiStringProc:=@StrCompAnsi;
  319. {
  320. StrICompAnsiStringProc
  321. StrLCompAnsiStringProc
  322. StrLICompAnsiStringProc
  323. StrLowerAnsiStringProc
  324. StrUpperAnsiStringProc
  325. }
  326. end;
  327. SetWideStringManager(CWideStringManager);
  328. end;
  329. initialization
  330. SetCWideStringManager;
  331. { init conversion tables }
  332. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  333. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  334. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  335. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  336. finalization
  337. iconv_close(iconv_ansi2wide);
  338. end.