cwstring.pp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2005 by Florian Klaempfl,
  5. member of the Free Pascal development team.
  6. libc based wide string support
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. { *********************************************************************** }
  14. { Parts are Copyright (c) 2005 Andreas Hausladen }
  15. { }
  16. { This software is provided 'as-is', without any express or }
  17. { implied warranty. In no event will the author be held liable }
  18. { for any damages arising from the use of this software. }
  19. { }
  20. { Permission is granted to anyone to use this software for any }
  21. { purpose, including commercial applications, and to alter it }
  22. { and redistribute it freely, subject to the following }
  23. { restrictions: }
  24. { }
  25. { 1. The origin of this software must not be misrepresented, }
  26. { you must not claim that you wrote the original software. }
  27. { If you use this software in a product, an acknowledgment }
  28. { in the product documentation would be appreciated but is }
  29. { not required. }
  30. { }
  31. { 2. Altered source versions must be plainly marked as such, and }
  32. { must not be misrepresented as being the original software. }
  33. { }
  34. { 3. This notice may not be removed or altered from any source }
  35. { distribution. }
  36. { }
  37. { *********************************************************************** }
  38. {$mode objfpc}
  39. unit cwstring;
  40. interface
  41. {$linklib c}
  42. Procedure SetCWidestringManager;
  43. implementation
  44. {$linklib c}
  45. Uses
  46. BaseUnix,
  47. ctypes,
  48. unix,
  49. unixtype,
  50. sysutils,
  51. initc;
  52. { Case-mapping "arrays" }
  53. var
  54. AnsiUpperChars: AnsiString; // 1..255
  55. AnsiLowerChars: AnsiString; // 1..255
  56. WideUpperChars: WideString; // 1..65535
  57. WideLowerChars: WideString; // 1..65535
  58. { the following declarations are from the libc unit for linux so they
  59. might be very linux centric
  60. maybe this needs to be splitted in an os depend way later }
  61. function towlower(__wc:wint_t):wint_t;cdecl;external;
  62. function towupper(__wc:wint_t):wint_t;cdecl;external;
  63. function wcscoll(__s1:pwchar_t; __s2:pwchar_t):longint;cdecl;external;
  64. const
  65. __LC_CTYPE = 0;
  66. _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
  67. _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
  68. CODESET = _NL_CTYPE_CODESET_NAME;
  69. { unicode encoding name }
  70. {$ifdef FPC_LITTLE_ENDIAN}
  71. unicode_encoding = 'UNICODELITTLE';
  72. {$else FPC_LITTLE_ENDIAN}
  73. unicode_encoding = 'UNICODEBIG';
  74. {$endif FPC_LITTLE_ENDIAN}
  75. type
  76. piconv_t = ^iconv_t;
  77. iconv_t = pointer;
  78. nl_item = longint;
  79. function nl_langinfo(__item:nl_item):pchar;cdecl;external;
  80. function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external;
  81. function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external;
  82. function iconv_close(__cd:iconv_t):longint;cdecl;external;
  83. var
  84. iconv_ansi2wide,
  85. iconv_wide2ansi : iconv_t;
  86. procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  87. var
  88. outlength,
  89. outoffset,
  90. outleft : size_t;
  91. srcpos : pwidechar;
  92. destpos: pchar;
  93. mynil : pchar;
  94. my0 : size_t;
  95. begin
  96. mynil:=nil;
  97. my0:=0;
  98. { rought estimation }
  99. setlength(dest,len*3);
  100. outlength:=len*3;
  101. srcpos:=source;
  102. destpos:=pchar(dest);
  103. outleft:=outlength;
  104. while iconv(iconv_wide2ansi,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  105. begin
  106. case fpgetCerrno of
  107. ESysEINVAL:
  108. { sometimes it seems to be necessary to reset the conversion context }
  109. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  110. ESysE2BIG:
  111. begin
  112. outoffset:=destpos-pchar(dest);
  113. { extend }
  114. setlength(dest,outlength+len*3);
  115. inc(outleft,len*3);
  116. inc(outlength,len*3);
  117. { string could have been moved }
  118. destpos:=pchar(dest)+outoffset;
  119. end;
  120. else
  121. raise EConvertError.Create('iconv error');
  122. end;
  123. end;
  124. // truncate string
  125. setlength(dest,length(dest)-outleft);
  126. end;
  127. procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  128. var
  129. outlength,
  130. outoffset,
  131. outleft : size_t;
  132. srcpos,
  133. destpos: pchar;
  134. mynil : pchar;
  135. my0 : size_t;
  136. begin
  137. mynil:=nil;
  138. my0:=0;
  139. setlength(dest,len);
  140. outlength:=len;
  141. srcpos:=source;
  142. destpos:=pchar(dest);
  143. outleft:=outlength*2;
  144. while iconv(iconv_ansi2wide,@srcpos,@len,@destpos,@outleft)=size_t(-1) do
  145. begin
  146. case fpgetCerrno of
  147. ESysEINVAL:
  148. { sometimes it seems to be necessary to reset the conversion context }
  149. iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
  150. ESysE2BIG:
  151. begin
  152. outoffset:=destpos-pchar(dest);
  153. { extend }
  154. setlength(dest,outlength+len);
  155. inc(outleft,len*2);
  156. inc(outlength,len);
  157. { string could have been moved }
  158. destpos:=pchar(dest)+outoffset;
  159. end;
  160. else
  161. raise EConvertError.Create('iconv error');
  162. end;
  163. end;
  164. // truncate string
  165. setlength(dest,length(dest)-outleft div 2);
  166. end;
  167. function LowerWideString(const s : WideString) : WideString;
  168. var
  169. i : SizeInt;
  170. begin
  171. SetLength(result,length(s));
  172. for i:=1 to length(s) do
  173. result[i]:=WideChar(towlower(wint_t(s[i])));
  174. end;
  175. function UpperWideString(const s : WideString) : WideString;
  176. var
  177. i : SizeInt;
  178. begin
  179. SetLength(result,length(s));
  180. for i:=1 to length(s) do
  181. result[i]:=WideChar(towupper(wint_t(s[i])));
  182. end;
  183. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  184. begin
  185. end;
  186. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  187. begin
  188. end;
  189. Var
  190. CWideStringManager : TWideStringManager;
  191. Procedure SetCWideStringManager;
  192. begin
  193. With CWideStringManager do
  194. begin
  195. Wide2AnsiMoveProc:=@Wide2AnsiMove;
  196. Ansi2WideMoveProc:=@Ansi2WideMove;
  197. UpperWideStringProc:=@UpperWideString;
  198. LowerWideStringProc:=@LowerWideString;
  199. {
  200. CompareWideStringProc
  201. CompareTextWideStringProc
  202. CharLengthPCharProc
  203. UpperAnsiStringProc
  204. LowerAnsiStringProc
  205. CompareStrAnsiStringProc
  206. CompareTextAnsiStringProc
  207. StrCompAnsiStringProc
  208. StrICompAnsiStringProc
  209. StrLCompAnsiStringProc
  210. StrLICompAnsiStringProc
  211. StrLowerAnsiStringProc
  212. StrUpperAnsiStringProc
  213. }
  214. end;
  215. SetWideStringManager(CWideStringManager);
  216. end;
  217. procedure InitCharArrays;
  218. var
  219. i: longint;
  220. begin
  221. {$ifdef dummy}
  222. // first initialize the WideChar arrays
  223. SetLength(WideUpperChars, +1);
  224. SetLength(WideLowerChars, High(WideChar)+1);
  225. for i := 0 to High(WideChar) do
  226. WideUpperChars[i+1]:=WideChar(towupper(wint_t(i)));
  227. for i := 0 to High(WideChar) do
  228. WideLowerChars[i+1]:=WideChar(towlower(wint_t(i)));
  229. // use the widechar array to initialize the AnsiChar arrays
  230. SetLength(AnsiUpperChars, Byte(High(Char)) + 1);
  231. SetLength(AnsiLowerChars, Byte(High(Char)) + 1);
  232. for i:=0 to High(Char) do
  233. AnsiUpperChars[i+1]:=AnsiChar(i);
  234. for i:=0 to High(Char) do
  235. AnsiLowerChars[i+1]:=AnsiChar(i);
  236. AnsiUpperChars:=WideUpperCase(AnsiUpperChars);
  237. AnsiLowerChars:=WideLowerCase(AnsiLowerChars);
  238. {$endif}
  239. end;
  240. initialization
  241. SetCWideStringManager;
  242. InitCharArrays;
  243. { init conversion tables }
  244. writeln(nl_langinfo(CODESET));
  245. iconv_ansi2wide:=iconv_open(nl_langinfo(CODESET),unicode_encoding);
  246. iconv_wide2ansi:=iconv_open(unicode_encoding,nl_langinfo(CODESET));
  247. finalization
  248. iconv_close(iconv_ansi2wide);
  249. end.
  250. {
  251. $Log$
  252. Revision 1.4 2005-03-16 22:26:12 florian
  253. + ansi<->wide implemented using iconv
  254. Revision 1.3 2005/02/14 17:13:31 peter
  255. * truncate log
  256. Revision 1.2 2005/02/03 18:40:57 florian
  257. + infrastructure for WideCompareText implemented
  258. Revision 1.1 2005/02/01 20:22:50 florian
  259. * improved widestring infrastructure manager
  260. }