cwstring.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$ifndef linux} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. sysutils,
  28. initc;
  29. Const
  30. {$ifndef useiconv}
  31. libiconvname='c'; // is in libc under Linux.
  32. {$else}
  33. libiconvname='iconv';
  34. {$endif}
  35. { Case-mapping "arrays" }
  36. var
  37. AnsiUpperChars: AnsiString; // 1..255
  38. AnsiLowerChars: AnsiString; // 1..255
  39. WideUpperChars: WideString; // 1..65535
  40. WideLowerChars: WideString; // 1..65535
  41. { the following declarations are from the libc unit for linux so they
  42. might be very linux centric
  43. maybe this needs to be splitted in an os depend way later }
  44. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  45. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  46. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  47. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  48. const
  49. {$ifdef linux}
  50. __LC_CTYPE = 0;
  51. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  52. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  53. CODESET = _NL_CTYPE_CODESET_NAME;
  54. {$else linux}
  55. {$ifdef darwin}
  56. CODESET = 0;
  57. {$else darwin}
  58. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  59. __LC_CTYPE = 0;
  60. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  61. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  62. CODESET = _NL_CTYPE_CODESET_NAME;
  63. {$else freebsd}
  64. {$ifdef solaris}
  65. CODESET=49;
  66. {$else}
  67. {$error lookup the value of CODESET in /usr/include/langinfo.h for your OS }
  68. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  69. {$endif solaris}
  70. {$endif FreeBSD}
  71. {$endif darwin}
  72. {$endif linux}
  73. { unicode encoding name }
  74. {$ifdef FPC_LITTLE_ENDIAN}
  75. unicode_encoding = 'UNICODELITTLE';
  76. {$else FPC_LITTLE_ENDIAN}
  77. unicode_encoding = 'UNICODEBIG';
  78. {$endif FPC_LITTLE_ENDIAN}
  79. type
  80. piconv_t = ^iconv_t;
  81. iconv_t = pointer;
  82. nl_item = cint;
  83. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  84. {$ifndef bsd}
  85. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  86. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  87. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  88. {$else}
  89. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  90. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  91. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  92. {$endif}
  93. var
  94. iconv_ansi2ucs4,
  95. iconv_ucs42ansi,
  96. iconv_ansi2wide,
  97. iconv_wide2ansi : iconv_t;
  98. lock_ansi2ucs4 : integer = -1;
  99. lock_ucs42ansi : integer = -1;
  100. lock_ansi2wide : integer = -1;
  101. lock_wide2ansi : integer = -1;
  102. procedure lockiconv(var lockcount: integer);
  103. begin
  104. while interlockedincrement(lockcount) <> 0 do begin
  105. interlockeddecrement(lockcount);
  106. sleep(0);
  107. end;
  108. end;
  109. procedure unlockiconv(var lockcount: integer);
  110. begin
  111. interlockeddecrement(lockcount);
  112. end;
  113. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  114. var
  115. outlength,
  116. outoffset,
  117. srclen,
  118. outleft : size_t;
  119. srcpos : pwidechar;
  120. destpos: pchar;
  121. mynil : pchar;
  122. my0 : size_t;
  123. begin
  124. mynil:=nil;
  125. my0:=0;
  126. { rought estimation }
  127. setlength(dest,len*3);
  128. outlength:=len*3;
  129. srclen:=len*2;
  130. srcpos:=source;
  131. destpos:=pchar(dest);
  132. outleft:=outlength;
  133. lockiconv(lock_wide2ansi);
  134. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  135. begin
  136. case fpgetCerrno of
  137. ESysEILSEQ:
  138. begin
  139. { skip and set to '?' }
  140. inc(srcpos);
  141. dec(srclen,2);
  142. destpos^:='?';
  143. inc(destpos);
  144. dec(outleft);
  145. { reset }
  146. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  147. end;
  148. ESysE2BIG:
  149. begin
  150. outoffset:=destpos-pchar(dest);
  151. { extend }
  152. setlength(dest,outlength+len*3);
  153. inc(outleft,len*3);
  154. inc(outlength,len*3);
  155. { string could have been moved }
  156. destpos:=pchar(dest)+outoffset;
  157. end;
  158. else
  159. begin
  160. unlockiconv(lock_wide2ansi);
  161. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  162. end;
  163. end;
  164. end;
  165. unlockiconv(lock_wide2ansi);
  166. // truncate string
  167. setlength(dest,length(dest)-outleft);
  168. end;
  169. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  170. var
  171. outlength,
  172. outoffset,
  173. outleft : size_t;
  174. srcpos,
  175. destpos: pchar;
  176. mynil : pchar;
  177. my0 : size_t;
  178. begin
  179. mynil:=nil;
  180. my0:=0;
  181. // extra space
  182. outlength:=len+1;
  183. setlength(dest,outlength);
  184. outlength:=len+1;
  185. srcpos:=source;
  186. destpos:=pchar(dest);
  187. outleft:=outlength*2;
  188. lockiconv(lock_ansi2wide);
  189. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  190. begin
  191. case fpgetCerrno of
  192. ESysEILSEQ:
  193. begin
  194. { skip and set to '?' }
  195. inc(srcpos);
  196. pwidechar(destpos)^:='?';
  197. inc(destpos,2);
  198. dec(outleft,2);
  199. { reset }
  200. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  201. end;
  202. ESysE2BIG:
  203. begin
  204. outoffset:=destpos-pchar(dest);
  205. { extend }
  206. setlength(dest,outlength+len);
  207. inc(outleft,len*2);
  208. inc(outlength,len);
  209. { string could have been moved }
  210. destpos:=pchar(dest)+outoffset;
  211. end;
  212. else
  213. begin
  214. unlockiconv(lock_ansi2wide);
  215. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  216. end;
  217. end;
  218. end;
  219. unlockiconv(lock_ansi2wide);
  220. // truncate string
  221. setlength(dest,length(dest)-outleft div 2);
  222. end;
  223. function LowerWideString(const s : WideString) : WideString;
  224. var
  225. i : SizeInt;
  226. begin
  227. SetLength(result,length(s));
  228. for i:=1 to length(s) do
  229. result[i]:=WideChar(towlower(wint_t(s[i])));
  230. end;
  231. function UpperWideString(const s : WideString) : WideString;
  232. var
  233. i : SizeInt;
  234. begin
  235. SetLength(result,length(s));
  236. for i:=1 to length(s) do
  237. result[i]:=WideChar(towupper(wint_t(s[i])));
  238. end;
  239. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  240. var
  241. outlength,
  242. outoffset,
  243. outleft : size_t;
  244. srcpos,
  245. destpos: pchar;
  246. mynil : pchar;
  247. my0 : size_t;
  248. begin
  249. mynil:=nil;
  250. my0:=0;
  251. // extra space
  252. outlength:=len+1;
  253. setlength(dest,outlength);
  254. outlength:=len+1;
  255. srcpos:=source;
  256. destpos:=pchar(dest);
  257. outleft:=outlength*4;
  258. lockiconv(lock_ansi2ucs4);
  259. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  260. begin
  261. case fpgetCerrno of
  262. ESysE2BIG:
  263. begin
  264. outoffset:=destpos-pchar(dest);
  265. { extend }
  266. setlength(dest,outlength+len);
  267. inc(outleft,len*4);
  268. inc(outlength,len);
  269. { string could have been moved }
  270. destpos:=pchar(dest)+outoffset;
  271. end;
  272. else
  273. begin
  274. unlockiconv(lock_ansi2ucs4);
  275. raise EConvertError.Create('iconv error '+IntToStr(fpgetCerrno));
  276. end;
  277. end;
  278. end;
  279. unlockiconv(lock_ansi2ucs4);
  280. // truncate string
  281. setlength(dest,length(dest)-outleft div 4);
  282. end;
  283. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  284. var
  285. hs1,hs2 : UCS4String;
  286. begin
  287. hs1:=WideStringToUCS4String(s1);
  288. hs2:=WideStringToUCS4String(s2);
  289. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  290. end;
  291. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  292. begin
  293. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  294. end;
  295. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  296. begin
  297. result:=strcoll(s1,s2);
  298. end;
  299. Procedure SetCWideStringManager;
  300. Var
  301. CWideStringManager : TWideStringManager;
  302. begin
  303. CWideStringManager:=widestringmanager;
  304. With CWideStringManager do
  305. begin
  306. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  307. Ansi2WideMoveProc:=@Ansi2WideMove;
  308. UpperWideStringProc:=@UpperWideString;
  309. LowerWideStringProc:=@LowerWideString;
  310. CompareWideStringProc:=@CompareWideString;
  311. CompareTextWideStringProc:=@CompareTextWideString;
  312. {
  313. CharLengthPCharProc
  314. UpperAnsiStringProc
  315. LowerAnsiStringProc
  316. CompareStrAnsiStringProc
  317. CompareTextAnsiStringProc
  318. }
  319. StrCompAnsiStringProc:=@StrCompAnsi;
  320. {
  321. StrICompAnsiStringProc
  322. StrLCompAnsiStringProc
  323. StrLICompAnsiStringProc
  324. StrLowerAnsiStringProc
  325. StrUpperAnsiStringProc
  326. }
  327. end;
  328. SetWideStringManager(CWideStringManager);
  329. end;
  330. initialization
  331. SetCWideStringManager;
  332. { init conversion tables }
  333. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  334. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  335. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  336. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  337. finalization
  338. iconv_close(iconv_ansi2wide);
  339. end.