cwstring.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. libc based wide string support
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit cwstring;
  14. interface
  15. procedure SetCWidestringManager;
  16. implementation
  17. {$linklib c}
  18. {$if not defined(linux) and not defined(solaris)} // Linux (and maybe glibc platforms in general), have iconv in glibc.
  19. {$linklib iconv}
  20. {$define useiconv}
  21. {$endif linux}
  22. Uses
  23. BaseUnix,
  24. ctypes,
  25. unix,
  26. unixtype,
  27. initc;
  28. Const
  29. {$ifndef useiconv}
  30. libiconvname='c'; // is in libc under Linux.
  31. {$else}
  32. libiconvname='iconv';
  33. {$endif}
  34. { Case-mapping "arrays" }
  35. var
  36. AnsiUpperChars: AnsiString; // 1..255
  37. AnsiLowerChars: AnsiString; // 1..255
  38. WideUpperChars: WideString; // 1..65535
  39. WideLowerChars: WideString; // 1..65535
  40. { the following declarations are from the libc unit for linux so they
  41. might be very linux centric
  42. maybe this needs to be splitted in an os depend way later }
  43. function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
  44. function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
  45. function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
  46. function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
  47. function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
  48. const
  49. {$ifdef linux}
  50. __LC_CTYPE = 0;
  51. LC_ALL = 6;
  52. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  53. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  54. CODESET = _NL_CTYPE_CODESET_NAME;
  55. {$else linux}
  56. {$ifdef darwin}
  57. CODESET = 0;
  58. LC_ALL = 0;
  59. {$else darwin}
  60. {$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
  61. __LC_CTYPE = 0;
  62. LC_ALL = 0;
  63. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  64. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  65. CODESET = 0; // _NL_CTYPE_CODESET_NAME;
  66. {$else freebsd}
  67. {$ifdef solaris}
  68. CODESET=49;
  69. LC_ALL = 6;
  70. {$else solaris}
  71. {$ifdef beos}
  72. {$warning check correct value for BeOS}
  73. CODESET=49;
  74. LC_ALL = 6; // Checked for BeOS, but 0 under Haiku...
  75. ESysEILSEQ = EILSEQ;
  76. {$else}
  77. {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
  78. // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
  79. {$endif beos}
  80. {$endif solaris}
  81. {$endif FreeBSD}
  82. {$endif darwin}
  83. {$endif linux}
  84. { unicode encoding name }
  85. {$ifdef FPC_LITTLE_ENDIAN}
  86. unicode_encoding = 'UTF-16LE';
  87. {$else FPC_LITTLE_ENDIAN}
  88. unicode_encoding = 'UTF-16BE';
  89. {$endif FPC_LITTLE_ENDIAN}
  90. type
  91. piconv_t = ^iconv_t;
  92. iconv_t = pointer;
  93. nl_item = cint;
  94. {$ifndef beos}
  95. function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
  96. {$endif}
  97. { $ ifndef bsd}
  98. {$if not defined(bsd) and not defined(beos)}
  99. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
  100. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
  101. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
  102. {$else}
  103. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
  104. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
  105. function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
  106. {$endif}
  107. var
  108. iconv_ansi2ucs4,
  109. iconv_ucs42ansi,
  110. iconv_ansi2wide,
  111. iconv_wide2ansi : iconv_t;
  112. lock_ansi2ucs4 : integer = -1;
  113. lock_ucs42ansi : integer = -1;
  114. lock_ansi2wide : integer = -1;
  115. lock_wide2ansi : integer = -1;
  116. iconv_lock : TRTLcriticalsection;
  117. {
  118. procedure lockiconv(var lockcount: integer);
  119. begin
  120. while interlockedincrement(lockcount) <> 0 do begin
  121. interlockeddecrement(lockcount);
  122. sleep(0);
  123. end;
  124. end;
  125. procedure unlockiconv(var lockcount: integer);
  126. begin
  127. interlockeddecrement(lockcount);
  128. end;
  129. }
  130. {$ifdef beos}
  131. function nl_langinfo(__item:nl_item):pchar;
  132. begin
  133. {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
  134. Result := '';
  135. end;
  136. {$endif}
  137. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  138. var
  139. outlength,
  140. outoffset,
  141. srclen,
  142. outleft : size_t;
  143. srcpos : pwidechar;
  144. destpos: pchar;
  145. mynil : pchar;
  146. my0 : size_t;
  147. begin
  148. mynil:=nil;
  149. my0:=0;
  150. { rought estimation }
  151. setlength(dest,len*3);
  152. outlength:=len*3;
  153. srclen:=len*2;
  154. srcpos:=source;
  155. destpos:=pchar(dest);
  156. outleft:=outlength;
  157. entercriticalsection(iconv_lock);
  158. while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
  159. begin
  160. case fpgetCerrno of
  161. ESysEILSEQ:
  162. begin
  163. { skip and set to '?' }
  164. inc(srcpos);
  165. dec(srclen,2);
  166. destpos^:='?';
  167. inc(destpos);
  168. dec(outleft);
  169. { reset }
  170. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  171. end;
  172. ESysE2BIG:
  173. begin
  174. outoffset:=destpos-pchar(dest);
  175. { extend }
  176. setlength(dest,outlength+len*3);
  177. inc(outleft,len*3);
  178. inc(outlength,len*3);
  179. { string could have been moved }
  180. destpos:=pchar(dest)+outoffset;
  181. end;
  182. else
  183. leavecriticalsection(iconv_lock);
  184. runerror(231);
  185. end;
  186. end;
  187. leavecriticalsection(iconv_lock);
  188. // truncate string
  189. setlength(dest,length(dest)-outleft);
  190. end;
  191. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  192. var
  193. outlength,
  194. outoffset,
  195. outleft : size_t;
  196. srcpos,
  197. destpos: pchar;
  198. mynil : pchar;
  199. my0 : size_t;
  200. begin
  201. mynil:=nil;
  202. my0:=0;
  203. // extra space
  204. outlength:=len+1;
  205. setlength(dest,outlength);
  206. outlength:=len+1;
  207. srcpos:=source;
  208. destpos:=pchar(dest);
  209. outleft:=outlength*2;
  210. entercriticalsection(iconv_lock);
  211. while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  212. begin
  213. case fpgetCerrno of
  214. ESysEILSEQ:
  215. begin
  216. { skip and set to '?' }
  217. inc(srcpos);
  218. dec(len);
  219. pwidechar(destpos)^:='?';
  220. inc(destpos,2);
  221. dec(outleft,2);
  222. { reset }
  223. iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
  224. end;
  225. ESysE2BIG:
  226. begin
  227. outoffset:=destpos-pchar(dest);
  228. { extend }
  229. setlength(dest,outlength+len);
  230. inc(outleft,len*2);
  231. inc(outlength,len);
  232. { string could have been moved }
  233. destpos:=pchar(dest)+outoffset;
  234. end;
  235. else
  236. leavecriticalsection(iconv_lock);
  237. runerror(231);
  238. end;
  239. end;
  240. leavecriticalsection(iconv_lock);
  241. // truncate string
  242. setlength(dest,length(dest)-outleft div 2);
  243. end;
  244. function LowerWideString(const s : WideString) : WideString;
  245. var
  246. i : SizeInt;
  247. begin
  248. SetLength(result,length(s));
  249. for i:=1 to length(s) do
  250. result[i]:=WideChar(towlower(wint_t(s[i])));
  251. end;
  252. function UpperWideString(const s : WideString) : WideString;
  253. var
  254. i : SizeInt;
  255. begin
  256. SetLength(result,length(s));
  257. for i:=1 to length(s) do
  258. result[i]:=WideChar(towupper(wint_t(s[i])));
  259. end;
  260. procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
  261. var
  262. outlength,
  263. outoffset,
  264. outleft : size_t;
  265. srcpos,
  266. destpos: pchar;
  267. mynil : pchar;
  268. my0 : size_t;
  269. begin
  270. mynil:=nil;
  271. my0:=0;
  272. // extra space
  273. outlength:=len+1;
  274. setlength(dest,outlength);
  275. outlength:=len+1;
  276. srcpos:=source;
  277. destpos:=pchar(dest);
  278. outleft:=outlength*4;
  279. entercriticalsection(iconv_lock);
  280. while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
  281. begin
  282. case fpgetCerrno of
  283. ESysE2BIG:
  284. begin
  285. outoffset:=destpos-pchar(dest);
  286. { extend }
  287. setlength(dest,outlength+len);
  288. inc(outleft,len*4);
  289. inc(outlength,len);
  290. { string could have been moved }
  291. destpos:=pchar(dest)+outoffset;
  292. end;
  293. else
  294. leavecriticalsection(iconv_lock);
  295. runerror(231);
  296. end;
  297. end;
  298. leavecriticalsection(iconv_lock);
  299. // truncate string
  300. setlength(dest,length(dest)-outleft div 4);
  301. end;
  302. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  303. var
  304. hs1,hs2 : UCS4String;
  305. begin
  306. hs1:=WideStringToUCS4String(s1);
  307. hs2:=WideStringToUCS4String(s2);
  308. result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
  309. end;
  310. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  311. begin
  312. result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
  313. end;
  314. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  315. begin
  316. result:=strcoll(s1,s2);
  317. end;
  318. Procedure SetCWideStringManager;
  319. Var
  320. CWideStringManager : TWideStringManager;
  321. begin
  322. CWideStringManager:=widestringmanager;
  323. With CWideStringManager do
  324. begin
  325. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  326. Ansi2WideMoveProc:=@Ansi2WideMove;
  327. UpperWideStringProc:=@UpperWideString;
  328. LowerWideStringProc:=@LowerWideString;
  329. CompareWideStringProc:=@CompareWideString;
  330. CompareTextWideStringProc:=@CompareTextWideString;
  331. {
  332. CharLengthPCharProc
  333. UpperAnsiStringProc
  334. LowerAnsiStringProc
  335. CompareStrAnsiStringProc
  336. CompareTextAnsiStringProc
  337. }
  338. StrCompAnsiStringProc:=@StrCompAnsi;
  339. {
  340. StrICompAnsiStringProc
  341. StrLCompAnsiStringProc
  342. StrLICompAnsiStringProc
  343. StrLowerAnsiStringProc
  344. StrUpperAnsiStringProc
  345. }
  346. end;
  347. SetWideStringManager(CWideStringManager);
  348. end;
  349. initialization
  350. SetCWideStringManager;
  351. initcriticalsection(iconv_lock);
  352. { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff }
  353. { with the information from the environment variables according to POSIX }
  354. { (some OSes do this automatically, but e.g. Darwin and Solaris don't) }
  355. setlocale(LC_ALL,'');
  356. { init conversion tables }
  357. iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  358. iconv_ansi2wide:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  359. iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),'UCS4');
  360. iconv_ansi2ucs4:=iconv_open('UCS4',nl_langinfo(CODESET));
  361. finalization
  362. donecriticalsection(iconv_lock);
  363. iconv_close(iconv_ansi2wide);
  364. end.