charset.pas 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000 by Florian Klaempfl
  5. member of the Free Pascal development team.
  6. This unit implements several classes for charset conversions
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$ifndef delphi}
  14. {$mode objfpc}
  15. {$endif}
  16. unit charset;
  17. interface
  18. type
  19. tunicodechar = word;
  20. tunicodestring = ^tunicodechar;
  21. tcsconvert = class
  22. // !!!!!!1constructor create;
  23. end;
  24. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  25. umf_unused);
  26. punicodecharmapping = ^tunicodecharmapping;
  27. tunicodecharmapping = record
  28. unicode : tunicodechar;
  29. flag : tunicodecharmappingflag;
  30. reserved : byte;
  31. end;
  32. punicodemap = ^tunicodemap;
  33. tunicodemap = record
  34. cpname : string[20];
  35. map : punicodecharmapping;
  36. lastchar : longint;
  37. next : punicodemap;
  38. internalmap : boolean;
  39. end;
  40. tcp2unicode = class(tcsconvert)
  41. end;
  42. function loadunicodemapping(const cpname,f : string) : punicodemap;
  43. procedure registermapping(p : punicodemap);
  44. function getmap(const s : string) : punicodemap;
  45. function mappingavailable(const s : string) : boolean;
  46. function getunicode(c : char;p : punicodemap) : tunicodechar;
  47. function getascii(c : tunicodechar;p : punicodemap) : string;
  48. implementation
  49. var
  50. mappings : punicodemap;
  51. function loadunicodemapping(const cpname,f : string) : punicodemap;
  52. var
  53. data : punicodecharmapping;
  54. datasize : longint;
  55. t : text;
  56. s,hs : string;
  57. scanpos,charpos,unicodevalue : longint;
  58. code : integer;
  59. flag : tunicodecharmappingflag;
  60. p : punicodemap;
  61. lastchar : longint;
  62. begin
  63. lastchar:=-1;
  64. loadunicodemapping:=nil;
  65. datasize:=256;
  66. getmem(data,sizeof(tunicodecharmapping)*datasize);
  67. assign(t,f);
  68. {$I-}
  69. reset(t);
  70. {$I+}
  71. if ioresult<>0 then
  72. begin
  73. freemem(data,sizeof(tunicodecharmapping)*datasize);
  74. exit;
  75. end;
  76. while not(eof(t)) do
  77. begin
  78. readln(t,s);
  79. if (s[1]='0') and (s[2]='x') then
  80. begin
  81. flag:=umf_unused;
  82. scanpos:=3;
  83. hs:='$';
  84. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  85. begin
  86. hs:=hs+s[scanpos];
  87. inc(scanpos);
  88. end;
  89. val(hs,charpos,code);
  90. if code<>0 then
  91. begin
  92. freemem(data,sizeof(tunicodecharmapping)*datasize);
  93. close(t);
  94. exit;
  95. end;
  96. while not(s[scanpos] in ['0','#']) do
  97. inc(scanpos);
  98. if s[scanpos]='#' then
  99. begin
  100. { special char }
  101. unicodevalue:=$ffff;
  102. hs:=copy(s,scanpos,length(s)-scanpos+1);
  103. if hs='#DBCS LEAD BYTE' then
  104. flag:=umf_leadbyte;
  105. end
  106. else
  107. begin
  108. { C hex prefix }
  109. inc(scanpos,2);
  110. hs:='$';
  111. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  112. begin
  113. hs:=hs+s[scanpos];
  114. inc(scanpos);
  115. end;
  116. val(hs,unicodevalue,code);
  117. if code<>0 then
  118. begin
  119. freemem(data,sizeof(tunicodecharmapping)*datasize);
  120. close(t);
  121. exit;
  122. end;
  123. if charpos>datasize then
  124. begin
  125. { allocate 1024 bytes more because }
  126. { if we need more than 256 entries it's }
  127. { probably a mbcs with a lot of }
  128. { entries }
  129. datasize:=charpos+1024;
  130. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  131. end;
  132. flag:=umf_noinfo;
  133. end;
  134. {$ifdef delphi}
  135. data^.flag:=flag;
  136. data^.unicode:=unicodevalue;
  137. {$else}
  138. data[charpos].flag:=flag;
  139. data[charpos].unicode:=unicodevalue;
  140. {$endif delphi}
  141. if charpos>lastchar then
  142. lastchar:=charpos;
  143. end;
  144. end;
  145. close(t);
  146. new(p);
  147. p^.lastchar:=lastchar;
  148. p^.cpname:=cpname;
  149. p^.internalmap:=false;
  150. p^.next:=nil;
  151. p^.map:=data;
  152. loadunicodemapping:=p;
  153. end;
  154. procedure registermapping(p : punicodemap);
  155. begin
  156. p^.next:=mappings;
  157. mappings:=p;
  158. end;
  159. function getmap(const s : string) : punicodemap;
  160. var
  161. hp : punicodemap;
  162. const
  163. mapcache : string = '';
  164. mapcachep : punicodemap = nil;
  165. begin
  166. if (mapcache=s) and (mapcachep^.cpname=s) then
  167. begin
  168. getmap:=mapcachep;
  169. exit;
  170. end;
  171. hp:=mappings;
  172. while assigned(hp) do
  173. begin
  174. if hp^.cpname=s then
  175. begin
  176. getmap:=hp;
  177. mapcache:=s;
  178. mapcachep:=hp;
  179. exit;
  180. end;
  181. hp:=hp^.next;
  182. end;
  183. getmap:=nil;
  184. end;
  185. function mappingavailable(const s : string) : boolean;
  186. begin
  187. mappingavailable:=getmap(s)<>nil;
  188. end;
  189. function getunicode(c : char;p : punicodemap) : tunicodechar;
  190. begin
  191. if ord(c)<=p^.lastchar then
  192. {$ifdef Delphi}
  193. getunicode:=p^.map.unicode
  194. {$else}
  195. getunicode:=p^.map[ord(c)].unicode
  196. {$endif}
  197. else
  198. getunicode:=0;
  199. end;
  200. function getascii(c : tunicodechar;p : punicodemap) : string;
  201. var
  202. i : longint;
  203. begin
  204. { at least map to space }
  205. getascii:=#32;
  206. for i:=0 to p^.lastchar do
  207. {$ifdef Delphi}
  208. if p^.map.unicode=c then
  209. {$else}
  210. if p^.map[i].unicode=c then
  211. {$endif}
  212. begin
  213. if i<256 then
  214. getascii:=chr(i)
  215. else
  216. getascii:=chr(i div 256)+chr(i mod 256);
  217. exit;
  218. end;
  219. end;
  220. var
  221. hp : punicodemap;
  222. initialization
  223. mappings:=nil;
  224. finalization
  225. while assigned(mappings) do
  226. begin
  227. hp:=mappings^.next;
  228. if not(mappings^.internalmap) then
  229. begin
  230. freemem(mappings^.map);
  231. dispose(mappings);
  232. end;
  233. mappings:=hp;
  234. end;
  235. end.
  236. {
  237. $Log$
  238. Revision 1.3 2002-10-05 12:43:24 carl
  239. * fixes for Delphi 6 compilation
  240. (warning : Some features do not work under Delphi)
  241. Revision 1.2 2002/09/07 15:25:02 peter
  242. * old logs removed and tabs fixed
  243. Revision 1.1 2002/07/20 17:11:48 florian
  244. + source code page support
  245. }