ccharset.pas 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit ccharset;
  14. interface
  15. type
  16. tunicodechar = word;
  17. tunicodestring = ^tunicodechar;
  18. tcsconvert = class
  19. // !!!!!!1constructor create;
  20. end;
  21. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  22. umf_unused);
  23. punicodecharmapping = ^tunicodecharmapping;
  24. tunicodecharmapping = record
  25. unicode : tunicodechar;
  26. flag : tunicodecharmappingflag;
  27. reserved : byte;
  28. end;
  29. punicodemap = ^tunicodemap;
  30. tunicodemap = record
  31. cpname : string[20];
  32. cp : word;
  33. map : punicodecharmapping;
  34. lastchar : longint;
  35. next : punicodemap;
  36. internalmap : boolean;
  37. end;
  38. tcp2unicode = class(tcsconvert)
  39. end;
  40. const
  41. DefaultSystemCodePage = 437;
  42. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  43. procedure registermapping(p : punicodemap);
  44. function getmap(const s : string) : punicodemap;
  45. function getmap(cp : word) : punicodemap;
  46. function mappingavailable(const s : string) : boolean;
  47. function mappingavailable(cp :word) : boolean;
  48. function getunicode(c : char;p : punicodemap) : tunicodechar;
  49. function getascii(c : tunicodechar;p : punicodemap) : string;
  50. implementation
  51. var
  52. mappings : punicodemap;
  53. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  54. var
  55. data : punicodecharmapping;
  56. datasize : longint;
  57. t : text;
  58. s,hs : string;
  59. scanpos,charpos,unicodevalue : longint;
  60. code : word;
  61. flag : tunicodecharmappingflag;
  62. p : punicodemap;
  63. lastchar : longint;
  64. begin
  65. lastchar:=-1;
  66. loadunicodemapping:=nil;
  67. datasize:=256;
  68. getmem(data,sizeof(tunicodecharmapping)*datasize);
  69. assign(t,f);
  70. {$I-}
  71. reset(t);
  72. {$I+}
  73. if ioresult<>0 then
  74. begin
  75. freemem(data,sizeof(tunicodecharmapping)*datasize);
  76. exit;
  77. end;
  78. while not(eof(t)) do
  79. begin
  80. readln(t,s);
  81. if (s[1]='0') and (s[2]='x') then
  82. begin
  83. flag:=umf_unused;
  84. scanpos:=3;
  85. hs:='$';
  86. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  87. begin
  88. hs:=hs+s[scanpos];
  89. inc(scanpos);
  90. end;
  91. val(hs,charpos,code);
  92. if code<>0 then
  93. begin
  94. freemem(data,sizeof(tunicodecharmapping)*datasize);
  95. close(t);
  96. exit;
  97. end;
  98. while not(s[scanpos] in ['0','#']) do
  99. inc(scanpos);
  100. if s[scanpos]='#' then
  101. begin
  102. { special char }
  103. unicodevalue:=$ffff;
  104. hs:=copy(s,scanpos,length(s)-scanpos+1);
  105. if hs='#DBCS LEAD BYTE' then
  106. flag:=umf_leadbyte;
  107. end
  108. else
  109. begin
  110. { C hex prefix }
  111. inc(scanpos,2);
  112. hs:='$';
  113. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  114. begin
  115. hs:=hs+s[scanpos];
  116. inc(scanpos);
  117. end;
  118. val(hs,unicodevalue,code);
  119. if code<>0 then
  120. begin
  121. freemem(data,sizeof(tunicodecharmapping)*datasize);
  122. close(t);
  123. exit;
  124. end;
  125. if charpos>datasize then
  126. begin
  127. { allocate 1024 bytes more because }
  128. { if we need more than 256 entries it's }
  129. { probably a mbcs with a lot of }
  130. { entries }
  131. datasize:=charpos+1024;
  132. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  133. end;
  134. flag:=umf_noinfo;
  135. end;
  136. data[charpos].flag:=flag;
  137. data[charpos].unicode:=unicodevalue;
  138. if charpos>lastchar then
  139. lastchar:=charpos;
  140. end;
  141. end;
  142. close(t);
  143. new(p);
  144. p^.lastchar:=lastchar;
  145. p^.cpname:=cpname;
  146. p^.cp:=cp;
  147. p^.internalmap:=false;
  148. p^.next:=nil;
  149. p^.map:=data;
  150. loadunicodemapping:=p;
  151. end;
  152. procedure registermapping(p : punicodemap);
  153. begin
  154. p^.next:=mappings;
  155. mappings:=p;
  156. end;
  157. function getmap(const s : string) : punicodemap;
  158. var
  159. hp : punicodemap;
  160. const
  161. mapcache : string = '';
  162. mapcachep : punicodemap = nil;
  163. begin
  164. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  165. begin
  166. getmap:=mapcachep;
  167. exit;
  168. end;
  169. hp:=mappings;
  170. while assigned(hp) do
  171. begin
  172. if hp^.cpname=s then
  173. begin
  174. getmap:=hp;
  175. mapcache:=s;
  176. mapcachep:=hp;
  177. exit;
  178. end;
  179. hp:=hp^.next;
  180. end;
  181. getmap:=nil;
  182. end;////////
  183. function getmap(cp : word) : punicodemap;
  184. var
  185. hp : punicodemap;
  186. const
  187. mapcache : word = 0;
  188. mapcachep : punicodemap = nil;
  189. begin
  190. if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
  191. begin
  192. getmap:=mapcachep;
  193. exit;
  194. end;
  195. hp:=mappings;
  196. while assigned(hp) do
  197. begin
  198. if hp^.cp=cp then
  199. begin
  200. getmap:=hp;
  201. mapcache:=cp;
  202. mapcachep:=hp;
  203. exit;
  204. end;
  205. hp:=hp^.next;
  206. end;
  207. getmap:=nil;
  208. end;
  209. function mappingavailable(const s : string) : boolean;
  210. begin
  211. mappingavailable:=getmap(s)<>nil;
  212. end;
  213. function mappingavailable(cp : word) : boolean;
  214. begin
  215. mappingavailable:=getmap(cp)<>nil;
  216. end;
  217. function getunicode(c : char;p : punicodemap) : tunicodechar;
  218. begin
  219. if ord(c)<=p^.lastchar then
  220. getunicode:=p^.map[ord(c)].unicode
  221. else
  222. getunicode:=0;
  223. end;
  224. function getascii(c : tunicodechar;p : punicodemap) : string;
  225. var
  226. i : longint;
  227. begin
  228. { at least map to '?' }
  229. getascii:=#63;
  230. for i:=0 to p^.lastchar do
  231. if p^.map[i].unicode=c then
  232. begin
  233. if i<256 then
  234. getascii:=chr(i)
  235. else
  236. getascii:=chr(i div 256)+chr(i mod 256);
  237. exit;
  238. end;
  239. end;
  240. var
  241. hp : punicodemap;
  242. initialization
  243. mappings:=nil;
  244. finalization
  245. while assigned(mappings) do
  246. begin
  247. hp:=mappings^.next;
  248. if not(mappings^.internalmap) then
  249. begin
  250. freemem(mappings^.map);
  251. dispose(mappings);
  252. end;
  253. mappings:=hp;
  254. end;
  255. end.