charset.pp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit charset;
  14. interface
  15. type
  16. tunicodechar = word;
  17. tunicodestring = ^tunicodechar;
  18. tcsconvert = class
  19. // !!!!!!1constructor create;
  20. end;
  21. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  22. umf_unused);
  23. punicodecharmapping = ^tunicodecharmapping;
  24. tunicodecharmapping = record
  25. unicode : tunicodechar;
  26. flag : tunicodecharmappingflag;
  27. reserved : byte;
  28. end;
  29. punicodemap = ^tunicodemap;
  30. tunicodemap = record
  31. cpname : string[20];
  32. cp : word;
  33. map : punicodecharmapping;
  34. lastchar : longint;
  35. next : punicodemap;
  36. internalmap : boolean;
  37. end;
  38. tcp2unicode = class(tcsconvert)
  39. end;
  40. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  41. procedure registermapping(p : punicodemap);
  42. function getmap(const s : string) : punicodemap;
  43. function getmap(cp : word) : punicodemap;
  44. function mappingavailable(const s : string) : boolean;
  45. function mappingavailable(cp :word) : boolean;
  46. function getunicode(c : char;p : punicodemap) : tunicodechar;
  47. function getascii(c : tunicodechar;p : punicodemap) : string;
  48. implementation
  49. var
  50. mappings : punicodemap;
  51. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  52. var
  53. data : punicodecharmapping;
  54. datasize : longint;
  55. t : text;
  56. s,hs : string;
  57. scanpos,charpos,unicodevalue : longint;
  58. code : word;
  59. flag : tunicodecharmappingflag;
  60. p : punicodemap;
  61. lastchar : longint;
  62. begin
  63. lastchar:=-1;
  64. loadunicodemapping:=nil;
  65. datasize:=256;
  66. getmem(data,sizeof(tunicodecharmapping)*datasize);
  67. assign(t,f);
  68. {$I-}
  69. reset(t);
  70. {$I+}
  71. if ioresult<>0 then
  72. begin
  73. freemem(data,sizeof(tunicodecharmapping)*datasize);
  74. exit;
  75. end;
  76. while not(eof(t)) do
  77. begin
  78. readln(t,s);
  79. if (s[1]='0') and (s[2]='x') then
  80. begin
  81. flag:=umf_unused;
  82. scanpos:=3;
  83. hs:='$';
  84. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  85. begin
  86. hs:=hs+s[scanpos];
  87. inc(scanpos);
  88. end;
  89. val(hs,charpos,code);
  90. if code<>0 then
  91. begin
  92. freemem(data,sizeof(tunicodecharmapping)*datasize);
  93. close(t);
  94. exit;
  95. end;
  96. while not(s[scanpos] in ['0','#']) do
  97. inc(scanpos);
  98. if s[scanpos]='#' then
  99. begin
  100. { special char }
  101. unicodevalue:=$ffff;
  102. hs:=copy(s,scanpos,length(s)-scanpos+1);
  103. if hs='#DBCS LEAD BYTE' then
  104. flag:=umf_leadbyte;
  105. end
  106. else
  107. begin
  108. { C hex prefix }
  109. inc(scanpos,2);
  110. hs:='$';
  111. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  112. begin
  113. hs:=hs+s[scanpos];
  114. inc(scanpos);
  115. end;
  116. val(hs,unicodevalue,code);
  117. if code<>0 then
  118. begin
  119. freemem(data,sizeof(tunicodecharmapping)*datasize);
  120. close(t);
  121. exit;
  122. end;
  123. if charpos>datasize then
  124. begin
  125. { allocate 1024 bytes more because }
  126. { if we need more than 256 entries it's }
  127. { probably a mbcs with a lot of }
  128. { entries }
  129. datasize:=charpos+1024;
  130. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  131. end;
  132. flag:=umf_noinfo;
  133. end;
  134. data[charpos].flag:=flag;
  135. data[charpos].unicode:=unicodevalue;
  136. if charpos>lastchar then
  137. lastchar:=charpos;
  138. end;
  139. end;
  140. close(t);
  141. new(p);
  142. p^.lastchar:=lastchar;
  143. p^.cpname:=cpname;
  144. p^.cp:=cp;
  145. p^.internalmap:=false;
  146. p^.next:=nil;
  147. p^.map:=data;
  148. loadunicodemapping:=p;
  149. end;
  150. procedure registermapping(p : punicodemap);
  151. begin
  152. p^.next:=mappings;
  153. mappings:=p;
  154. end;
  155. function getmap(const s : string) : punicodemap;
  156. var
  157. hp : punicodemap;
  158. const
  159. mapcache : string = '';
  160. mapcachep : punicodemap = nil;
  161. begin
  162. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  163. begin
  164. getmap:=mapcachep;
  165. exit;
  166. end;
  167. hp:=mappings;
  168. while assigned(hp) do
  169. begin
  170. if hp^.cpname=s then
  171. begin
  172. getmap:=hp;
  173. mapcache:=s;
  174. mapcachep:=hp;
  175. exit;
  176. end;
  177. hp:=hp^.next;
  178. end;
  179. getmap:=nil;
  180. end;////////
  181. function getmap(cp : word) : punicodemap;
  182. var
  183. hp : punicodemap;
  184. const
  185. mapcache : word = 0;
  186. mapcachep : punicodemap = nil;
  187. begin
  188. if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
  189. begin
  190. getmap:=mapcachep;
  191. exit;
  192. end;
  193. hp:=mappings;
  194. while assigned(hp) do
  195. begin
  196. if hp^.cp=cp then
  197. begin
  198. getmap:=hp;
  199. mapcache:=cp;
  200. mapcachep:=hp;
  201. exit;
  202. end;
  203. hp:=hp^.next;
  204. end;
  205. getmap:=nil;
  206. end;
  207. function mappingavailable(const s : string) : boolean;
  208. begin
  209. mappingavailable:=getmap(s)<>nil;
  210. end;
  211. function mappingavailable(cp : word) : boolean;
  212. begin
  213. mappingavailable:=getmap(cp)<>nil;
  214. end;
  215. function getunicode(c : char;p : punicodemap) : tunicodechar;
  216. begin
  217. if ord(c)<=p^.lastchar then
  218. getunicode:=p^.map[ord(c)].unicode
  219. else
  220. getunicode:=0;
  221. end;
  222. function getascii(c : tunicodechar;p : punicodemap) : string;
  223. var
  224. i : longint;
  225. begin
  226. { at least map to '?' }
  227. getascii:=#63;
  228. for i:=0 to p^.lastchar do
  229. if p^.map[i].unicode=c then
  230. begin
  231. if i<256 then
  232. getascii:=chr(i)
  233. else
  234. getascii:=chr(i div 256)+chr(i mod 256);
  235. exit;
  236. end;
  237. end;
  238. var
  239. hp : punicodemap;
  240. initialization
  241. mappings:=nil;
  242. finalization
  243. while assigned(mappings) do
  244. begin
  245. hp:=mappings^.next;
  246. if not(mappings^.internalmap) then
  247. begin
  248. freemem(mappings^.map);
  249. dispose(mappings);
  250. end;
  251. mappings:=hp;
  252. end;
  253. end.