2
0

cwstring.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. initc;
  28. Const
  29. {$ifndef useiconv}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { Case-mapping "arrays" }
  35. var
  36. AnsiUpperChars: AnsiString; // 1..255
  37. AnsiLowerChars: AnsiString; // 1..255
  38. WideUpperChars: WideString; // 1..65535
  39. WideLowerChars: WideString; // 1..65535
  40. { the following declarations are from the libc unit for linux so they
  41. might be very linux centric
  42. maybe this needs to be splitted in an os depend way later }
  43. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  44. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  45. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  46. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  47. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  48. const
  49. {$ifdef linux}
  50. __LC_CTYPE = 0;
  51. LC_ALL = 6;
  52. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  53. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  54. CODESET = _NL_CTYPE_CODESET_NAME;
  55. {$else linux}
  56. {$ifdef darwin}
  57. CODESET = 0;
  58. LC_ALL = 0;
  59. {$else darwin}
  60. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  61. __LC_CTYPE = 0;
  62. LC_ALL = 0;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  66. {$else freebsd}
  67. {$ifdef solaris}
  68. CODESET=49;
  69. LC_ALL = 6;
  70. {$else}
  71. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  72. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  73. {$endif solaris}
  74. {$endif FreeBSD}
  75. {$endif darwin}
  76. {$endif linux}
  77. { unicode encoding name }
  78. {$ifdef FPC_LITTLE_ENDIAN}
  79. unicode_encoding = 'UTF-16LE';
  80. {$else FPC_LITTLE_ENDIAN}
  81. unicode_encoding = 'UTF-16BE';
  82. {$endif FPC_LITTLE_ENDIAN}
  83. type
  84. piconv_t = ^iconv_t;
  85. iconv_t = pointer;
  86. nl_item = cint;
  87. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  88. {$ifndef bsd}
  89. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  90. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  91. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  92. {$else}
  93. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  94. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  95. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  96. {$endif}
  97. var
  98. iconv_ansi2ucs4,
  99. iconv_ucs42ansi,
  100. iconv_ansi2wide,
  101. iconv_wide2ansi : iconv_t;
  102. lock_ansi2ucs4 : integer = -1;
  103. lock_ucs42ansi : integer = -1;
  104. lock_ansi2wide : integer = -1;
  105. lock_wide2ansi : integer = -1;
  106. iconv_lock : TRTLcriticalsection;
  107. {
  108. procedure lockiconv(var lockcount: integer);
  109. begin
  110. while interlockedincrement(lockcount) <> 0 do begin
  111. interlockeddecrement(lockcount);
  112. sleep(0);
  113. end;
  114. end;
  115. procedure unlockiconv(var lockcount: integer);
  116. begin
  117. interlockeddecrement(lockcount);
  118. end;
  119. }
  120. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  121. var
  122. outlength,
  123. outoffset,
  124. srclen,
  125. outleft : size_t;
  126. srcpos : pwidechar;
  127. destpos: pchar;
  128. mynil : pchar;
  129. my0 : size_t;
  130. begin
  131. mynil:=nil;
  132. my0:=0;
  133. { rought estimation }
  134. setlength(dest,len*3);
  135. outlength:=len*3;
  136. srclen:=len*2;
  137. srcpos:=source;
  138. destpos:=pchar(dest);
  139. outleft:=outlength;
  140. entercriticalsection(iconv_lock);
  141. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  142. begin
  143. case fpgetCerrno of
  144. ESysEILSEQ:
  145. begin
  146. { skip and set to '?' }
  147. inc(srcpos);
  148. dec(srclen,2);
  149. destpos^:='?';
  150. inc(destpos);
  151. dec(outleft);
  152. { reset }
  153. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  154. end;
  155. ESysE2BIG:
  156. begin
  157. outoffset:=destpos-pchar(dest);
  158. { extend }
  159. setlength(dest,outlength+len*3);
  160. inc(outleft,len*3);
  161. inc(outlength,len*3);
  162. { string could have been moved }
  163. destpos:=pchar(dest)+outoffset;
  164. end;
  165. else
  166. leavecriticalsection(iconv_lock);
  167. runerror(231);
  168. end;
  169. end;
  170. leavecriticalsection(iconv_lock);
  171. // truncate string
  172. setlength(dest,length(dest)-outleft);
  173. end;
  174. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  175. var
  176. outlength,
  177. outoffset,
  178. outleft : size_t;
  179. srcpos,
  180. destpos: pchar;
  181. mynil : pchar;
  182. my0 : size_t;
  183. begin
  184. mynil:=nil;
  185. my0:=0;
  186. // extra space
  187. outlength:=len+1;
  188. setlength(dest,outlength);
  189. outlength:=len+1;
  190. srcpos:=source;
  191. destpos:=pchar(dest);
  192. outleft:=outlength*2;
  193. entercriticalsection(iconv_lock);
  194. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  195. begin
  196. case fpgetCerrno of
  197. ESysEILSEQ:
  198. begin
  199. { skip and set to '?' }
  200. inc(srcpos);
  201. dec(len);
  202. pwidechar(destpos)^:='?';
  203. inc(destpos,2);
  204. dec(outleft,2);
  205. { reset }
  206. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  207. end;
  208. ESysE2BIG:
  209. begin
  210. outoffset:=destpos-pchar(dest);
  211. { extend }
  212. setlength(dest,outlength+len);
  213. inc(outleft,len*2);
  214. inc(outlength,len);
  215. { string could have been moved }
  216. destpos:=pchar(dest)+outoffset;
  217. end;
  218. else
  219. leavecriticalsection(iconv_lock);
  220. runerror(231);
  221. end;
  222. end;
  223. leavecriticalsection(iconv_lock);
  224. // truncate string
  225. setlength(dest,length(dest)-outleft div 2);
  226. end;
  227. function LowerWideString(const s : WideString) : WideString;
  228. var
  229. i : SizeInt;
  230. begin
  231. SetLength(result,length(s));
  232. for i:=1 to length(s) do
  233. result[i]:=WideChar(towlower(wint_t(s[i])));
  234. end;
  235. function UpperWideString(const s : WideString) : WideString;
  236. var
  237. i : SizeInt;
  238. begin
  239. SetLength(result,length(s));
  240. for i:=1 to length(s) do
  241. result[i]:=WideChar(towupper(wint_t(s[i])));
  242. end;
  243. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  244. var
  245. outlength,
  246. outoffset,
  247. outleft : size_t;
  248. srcpos,
  249. destpos: pchar;
  250. mynil : pchar;
  251. my0 : size_t;
  252. begin
  253. mynil:=nil;
  254. my0:=0;
  255. // extra space
  256. outlength:=len+1;
  257. setlength(dest,outlength);
  258. outlength:=len+1;
  259. srcpos:=source;
  260. destpos:=pchar(dest);
  261. outleft:=outlength*4;
  262. entercriticalsection(iconv_lock);
  263. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  264. begin
  265. case fpgetCerrno of
  266. ESysE2BIG:
  267. begin
  268. outoffset:=destpos-pchar(dest);
  269. { extend }
  270. setlength(dest,outlength+len);
  271. inc(outleft,len*4);
  272. inc(outlength,len);
  273. { string could have been moved }
  274. destpos:=pchar(dest)+outoffset;
  275. end;
  276. else
  277. leavecriticalsection(iconv_lock);
  278. runerror(231);
  279. end;
  280. end;
  281. leavecriticalsection(iconv_lock);
  282. // truncate string
  283. setlength(dest,length(dest)-outleft div 4);
  284. end;
  285. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  286. var
  287. hs1,hs2 : UCS4String;
  288. begin
  289. hs1:=WideStringToUCS4String(s1);
  290. hs2:=WideStringToUCS4String(s2);
  291. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  292. end;
  293. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  294. begin
  295. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  296. end;
  297. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  298. begin
  299. result:=strcoll(s1,s2);
  300. end;
  301. Procedure SetCWideStringManager;
  302. Var
  303. CWideStringManager : TWideStringManager;
  304. begin
  305. CWideStringManager:=widestringmanager;
  306. With CWideStringManager do
  307. begin
  308. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  309. Ansi2WideMoveProc:=@Ansi2WideMove;
  310. UpperWideStringProc:=@UpperWideString;
  311. LowerWideStringProc:=@LowerWideString;
  312. CompareWideStringProc:=@CompareWideString;
  313. CompareTextWideStringProc:=@CompareTextWideString;
  314. {
  315. CharLengthPCharProc
  316. UpperAnsiStringProc
  317. LowerAnsiStringProc
  318. CompareStrAnsiStringProc
  319. CompareTextAnsiStringProc
  320. }
  321. StrCompAnsiStringProc:=@StrCompAnsi;
  322. {
  323. StrICompAnsiStringProc
  324. StrLCompAnsiStringProc
  325. StrLICompAnsiStringProc
  326. StrLowerAnsiStringProc
  327. StrUpperAnsiStringProc
  328. }
  329. end;
  330. SetWideStringManager(CWideStringManager);
  331. end;
  332. initialization
  333. SetCWideStringManager;
  334. initcriticalsection(iconv_lock);
  335. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  336. { with the information from the environment variables according to POSIX }
  337. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  338. setlocale(LC_ALL,'');
  339. { init conversion tables }
  340. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  341. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  342. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  343. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  344. finalization
  345. donecriticalsection(iconv_lock);
  346. iconv_close(iconv_ansi2wide);
  347. end.