charset.pas 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000 by Florian Klaempfl
  5. member of the Free Pascal development team.
  6. This unit implements several classes for charset conversions
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************
  13. }
  14. unit charset;
  15. {$i fpcdefs.inc}
  16. interface
  17. type
  18. tunicodechar = word;
  19. tunicodestring = ^tunicodechar;
  20. tcsconvert = class
  21. // !!!!!!1constructor create;
  22. end;
  23. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  24. umf_unused);
  25. punicodecharmapping = ^tunicodecharmapping;
  26. tunicodecharmapping = record
  27. unicode : tunicodechar;
  28. flag : tunicodecharmappingflag;
  29. reserved : byte;
  30. end;
  31. punicodemap = ^tunicodemap;
  32. tunicodemap = record
  33. cpname : string[20];
  34. map : punicodecharmapping;
  35. lastchar : longint;
  36. next : punicodemap;
  37. internalmap : boolean;
  38. end;
  39. tcp2unicode = class(tcsconvert)
  40. end;
  41. function loadunicodemapping(const cpname,f : string) : punicodemap;
  42. procedure registermapping(p : punicodemap);
  43. function getmap(const s : string) : punicodemap;
  44. function mappingavailable(const s : string) : boolean;
  45. function getunicode(c : char;p : punicodemap) : tunicodechar;
  46. function getascii(c : tunicodechar;p : punicodemap) : string;
  47. implementation
  48. var
  49. mappings : punicodemap;
  50. function loadunicodemapping(const cpname,f : string) : punicodemap;
  51. var
  52. data : punicodecharmapping;
  53. datasize : longint;
  54. t : text;
  55. s,hs : string;
  56. scanpos,charpos,unicodevalue : longint;
  57. code : integer;
  58. flag : tunicodecharmappingflag;
  59. p : punicodemap;
  60. lastchar : longint;
  61. begin
  62. lastchar:=-1;
  63. loadunicodemapping:=nil;
  64. datasize:=256;
  65. getmem(data,sizeof(tunicodecharmapping)*datasize);
  66. assign(t,f);
  67. {$I-}
  68. reset(t);
  69. {$I+}
  70. if ioresult<>0 then
  71. begin
  72. freemem(data,sizeof(tunicodecharmapping)*datasize);
  73. exit;
  74. end;
  75. while not(eof(t)) do
  76. begin
  77. readln(t,s);
  78. if (s[1]='0') and (s[2]='x') then
  79. begin
  80. flag:=umf_unused;
  81. scanpos:=3;
  82. hs:='$';
  83. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  84. begin
  85. hs:=hs+s[scanpos];
  86. inc(scanpos);
  87. end;
  88. val(hs,charpos,code);
  89. if code<>0 then
  90. begin
  91. freemem(data,sizeof(tunicodecharmapping)*datasize);
  92. close(t);
  93. exit;
  94. end;
  95. while not(s[scanpos] in ['0','#']) do
  96. inc(scanpos);
  97. if s[scanpos]='#' then
  98. begin
  99. { special char }
  100. unicodevalue:=$ffff;
  101. hs:=copy(s,scanpos,length(s)-scanpos+1);
  102. if hs='#DBCS LEAD BYTE' then
  103. flag:=umf_leadbyte;
  104. end
  105. else
  106. begin
  107. { C hex prefix }
  108. inc(scanpos,2);
  109. hs:='$';
  110. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  111. begin
  112. hs:=hs+s[scanpos];
  113. inc(scanpos);
  114. end;
  115. val(hs,unicodevalue,code);
  116. if code<>0 then
  117. begin
  118. freemem(data,sizeof(tunicodecharmapping)*datasize);
  119. close(t);
  120. exit;
  121. end;
  122. if charpos>datasize then
  123. begin
  124. { allocate 1024 bytes more because }
  125. { if we need more than 256 entries it's }
  126. { probably a mbcs with a lot of }
  127. { entries }
  128. datasize:=charpos+1024;
  129. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  130. end;
  131. flag:=umf_noinfo;
  132. end;
  133. {$ifdef delphi}
  134. data^.flag:=flag;
  135. data^.unicode:=unicodevalue;
  136. {$else}
  137. data[charpos].flag:=flag;
  138. data[charpos].unicode:=unicodevalue;
  139. {$endif delphi}
  140. if charpos>lastchar then
  141. lastchar:=charpos;
  142. end;
  143. end;
  144. close(t);
  145. new(p);
  146. p^.lastchar:=lastchar;
  147. p^.cpname:=cpname;
  148. p^.internalmap:=false;
  149. p^.next:=nil;
  150. p^.map:=data;
  151. loadunicodemapping:=p;
  152. end;
  153. procedure registermapping(p : punicodemap);
  154. begin
  155. p^.next:=mappings;
  156. mappings:=p;
  157. end;
  158. function getmap(const s : string) : punicodemap;
  159. var
  160. hp : punicodemap;
  161. const
  162. mapcache : string = '';
  163. mapcachep : punicodemap = nil;
  164. begin
  165. if (mapcache=s) and (mapcachep^.cpname=s) then
  166. begin
  167. getmap:=mapcachep;
  168. exit;
  169. end;
  170. hp:=mappings;
  171. while assigned(hp) do
  172. begin
  173. if hp^.cpname=s then
  174. begin
  175. getmap:=hp;
  176. mapcache:=s;
  177. mapcachep:=hp;
  178. exit;
  179. end;
  180. hp:=hp^.next;
  181. end;
  182. getmap:=nil;
  183. end;
  184. function mappingavailable(const s : string) : boolean;
  185. begin
  186. mappingavailable:=getmap(s)<>nil;
  187. end;
  188. function getunicode(c : char;p : punicodemap) : tunicodechar;
  189. begin
  190. if ord(c)<=p^.lastchar then
  191. {$ifdef Delphi}
  192. getunicode:=p^.map.unicode
  193. {$else}
  194. getunicode:=p^.map[ord(c)].unicode
  195. {$endif}
  196. else
  197. getunicode:=0;
  198. end;
  199. function getascii(c : tunicodechar;p : punicodemap) : string;
  200. var
  201. i : longint;
  202. begin
  203. { at least map to space }
  204. getascii:=#32;
  205. for i:=0 to p^.lastchar do
  206. {$ifdef Delphi}
  207. if p^.map.unicode=c then
  208. {$else}
  209. if p^.map[i].unicode=c then
  210. {$endif}
  211. begin
  212. if i<256 then
  213. getascii:=chr(i)
  214. else
  215. getascii:=chr(i div 256)+chr(i mod 256);
  216. exit;
  217. end;
  218. end;
  219. var
  220. hp : punicodemap;
  221. initialization
  222. mappings:=nil;
  223. finalization
  224. while assigned(mappings) do
  225. begin
  226. hp:=mappings^.next;
  227. if not(mappings^.internalmap) then
  228. begin
  229. freemem(mappings^.map);
  230. dispose(mappings);
  231. end;
  232. mappings:=hp;
  233. end;
  234. end.
  235. {
  236. $Log$
  237. Revision 1.4 2003-04-22 14:33:38 peter
  238. * removed some notes/hints
  239. Revision 1.3 2002/10/05 12:43:24 carl
  240. * fixes for Delphi 6 compilation
  241. (warning : Some features do not work under Delphi)
  242. Revision 1.2 2002/09/07 15:25:02 peter
  243. * old logs removed and tabs fixed
  244. Revision 1.1 2002/07/20 17:11:48 florian
  245. + source code page support
  246. }