charset.pas 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000 by Florian Klaempfl
  5. member of the Free Pascal development team.
  6. This unit implements several classes for charset conversions
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$mode objfpc}
  14. unit charset;
  15. interface
  16. type
  17. tunicodechar = word;
  18. tunicodestring = ^tunicodechar;
  19. tcsconvert = class
  20. // !!!!!!1constructor create;
  21. end;
  22. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  23. umf_unused);
  24. punicodecharmapping = ^tunicodecharmapping;
  25. tunicodecharmapping = record
  26. unicode : tunicodechar;
  27. flag : tunicodecharmappingflag;
  28. reserved : byte;
  29. end;
  30. punicodemap = ^tunicodemap;
  31. tunicodemap = record
  32. cpname : string[20];
  33. map : punicodecharmapping;
  34. lastchar : longint;
  35. next : punicodemap;
  36. internalmap : boolean;
  37. end;
  38. tcp2unicode = class(tcsconvert)
  39. end;
  40. function loadunicodemapping(const cpname,f : string) : punicodemap;
  41. procedure registermapping(p : punicodemap);
  42. function getmap(const s : string) : punicodemap;
  43. function mappingavailable(const s : string) : boolean;
  44. function getunicode(c : char;p : punicodemap) : tunicodechar;
  45. function getascii(c : tunicodechar;p : punicodemap) : string;
  46. implementation
  47. var
  48. mappings : punicodemap;
  49. function loadunicodemapping(const cpname,f : string) : punicodemap;
  50. var
  51. data : punicodecharmapping;
  52. datasize : longint;
  53. t : text;
  54. s,hs : string;
  55. scanpos,charpos,unicodevalue : longint;
  56. code : word;
  57. flag : tunicodecharmappingflag;
  58. p : punicodemap;
  59. lastchar : longint;
  60. begin
  61. lastchar:=-1;
  62. loadunicodemapping:=nil;
  63. datasize:=256;
  64. getmem(data,sizeof(tunicodecharmapping)*datasize);
  65. assign(t,f);
  66. {$I-}
  67. reset(t);
  68. {$I+}
  69. if ioresult<>0 then
  70. begin
  71. freemem(data,sizeof(tunicodecharmapping)*datasize);
  72. exit;
  73. end;
  74. while not(eof(t)) do
  75. begin
  76. readln(t,s);
  77. if (s[1]='0') and (s[2]='x') then
  78. begin
  79. flag:=umf_unused;
  80. scanpos:=3;
  81. hs:='$';
  82. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  83. begin
  84. hs:=hs+s[scanpos];
  85. inc(scanpos);
  86. end;
  87. val(hs,charpos,code);
  88. if code<>0 then
  89. begin
  90. freemem(data,sizeof(tunicodecharmapping)*datasize);
  91. close(t);
  92. exit;
  93. end;
  94. while not(s[scanpos] in ['0','#']) do
  95. inc(scanpos);
  96. if s[scanpos]='#' then
  97. begin
  98. { special char }
  99. unicodevalue:=$ffff;
  100. hs:=copy(s,scanpos,length(s)-scanpos+1);
  101. if hs='#DBCS LEAD BYTE' then
  102. flag:=umf_leadbyte;
  103. end
  104. else
  105. begin
  106. { C hex prefix }
  107. inc(scanpos,2);
  108. hs:='$';
  109. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  110. begin
  111. hs:=hs+s[scanpos];
  112. inc(scanpos);
  113. end;
  114. val(hs,unicodevalue,code);
  115. if code<>0 then
  116. begin
  117. freemem(data,sizeof(tunicodecharmapping)*datasize);
  118. close(t);
  119. exit;
  120. end;
  121. if charpos>datasize then
  122. begin
  123. { allocate 1024 bytes more because }
  124. { if we need more than 256 entries it's }
  125. { probably a mbcs with a lot of }
  126. { entries }
  127. datasize:=charpos+1024;
  128. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  129. end;
  130. flag:=umf_noinfo;
  131. end;
  132. data[charpos].flag:=flag;
  133. data[charpos].unicode:=unicodevalue;
  134. if charpos>lastchar then
  135. lastchar:=charpos;
  136. end;
  137. end;
  138. close(t);
  139. new(p);
  140. p^.lastchar:=lastchar;
  141. p^.cpname:=cpname;
  142. p^.internalmap:=false;
  143. p^.next:=nil;
  144. p^.map:=data;
  145. loadunicodemapping:=p;
  146. end;
  147. procedure registermapping(p : punicodemap);
  148. begin
  149. p^.next:=mappings;
  150. mappings:=p;
  151. end;
  152. function getmap(const s : string) : punicodemap;
  153. var
  154. hp : punicodemap;
  155. const
  156. mapcache : string = '';
  157. mapcachep : punicodemap = nil;
  158. begin
  159. if (mapcache=s) and (mapcachep^.cpname=s) then
  160. begin
  161. getmap:=mapcachep;
  162. exit;
  163. end;
  164. hp:=mappings;
  165. while assigned(hp) do
  166. begin
  167. if hp^.cpname=s then
  168. begin
  169. getmap:=hp;
  170. mapcache:=s;
  171. mapcachep:=hp;
  172. exit;
  173. end;
  174. hp:=hp^.next;
  175. end;
  176. getmap:=nil;
  177. end;
  178. function mappingavailable(const s : string) : boolean;
  179. begin
  180. mappingavailable:=getmap(s)<>nil;
  181. end;
  182. function getunicode(c : char;p : punicodemap) : tunicodechar;
  183. begin
  184. if ord(c)<=p^.lastchar then
  185. getunicode:=p^.map[ord(c)].unicode
  186. else
  187. getunicode:=0;
  188. end;
  189. function getascii(c : tunicodechar;p : punicodemap) : string;
  190. var
  191. i : longint;
  192. begin
  193. { at least map to space }
  194. getascii:=#32;
  195. for i:=0 to p^.lastchar do
  196. if p^.map[i].unicode=c then
  197. begin
  198. if i<256 then
  199. getascii:=chr(i)
  200. else
  201. getascii:=chr(i div 256)+chr(i mod 256);
  202. exit;
  203. end;
  204. end;
  205. var
  206. hp : punicodemap;
  207. initialization
  208. mappings:=nil;
  209. finalization
  210. while assigned(mappings) do
  211. begin
  212. hp:=mappings^.next;
  213. if not(mappings^.internalmap) then
  214. begin
  215. freemem(mappings^.map);
  216. dispose(mappings);
  217. end;
  218. mappings:=hp;
  219. end;
  220. end.
  221. {
  222. $Log$
  223. Revision 1.1 2002-07-20 17:11:48 florian
  224. + source code page support
  225. Revision 1.2 2000/10/21 18:20:17 florian
  226. * a lot of small changes:
  227. - setlength is internal
  228. - win32 graph unit extended
  229. ....
  230. Revision 1.1 2000/08/17 07:29:39 florian
  231. + initial revision
  232. }