ccharset.pas 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. { this unit is included temporarily for 2.2 bootstrapping and can be
  13. removed after the next release after 2.2.2 }
  14. {$mode objfpc}
  15. unit ccharset;
  16. interface
  17. type
  18. tunicodechar = word;
  19. tunicodestring = ^tunicodechar;
  20. tcsconvert = class
  21. // !!!!!!1constructor create;
  22. end;
  23. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  24. umf_unused);
  25. punicodecharmapping = ^tunicodecharmapping;
  26. tunicodecharmapping = record
  27. unicode : tunicodechar;
  28. flag : tunicodecharmappingflag;
  29. reserved : byte;
  30. end;
  31. punicodemap = ^tunicodemap;
  32. tunicodemap = record
  33. cpname : string[20];
  34. cp : word;
  35. map : punicodecharmapping;
  36. lastchar : longint;
  37. next : punicodemap;
  38. internalmap : boolean;
  39. end;
  40. tcp2unicode = class(tcsconvert)
  41. end;
  42. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  43. procedure registermapping(p : punicodemap);
  44. function getmap(const s : string) : punicodemap;
  45. function getmap(cp : word) : punicodemap;
  46. function mappingavailable(const s : string) : boolean;
  47. function getunicode(c : char;p : punicodemap) : tunicodechar;
  48. function getascii(c : tunicodechar;p : punicodemap) : string;
  49. implementation
  50. var
  51. mappings : punicodemap;
  52. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  53. var
  54. data : punicodecharmapping;
  55. datasize : longint;
  56. t : text;
  57. s,hs : string;
  58. scanpos,charpos,unicodevalue : longint;
  59. code : word;
  60. flag : tunicodecharmappingflag;
  61. p : punicodemap;
  62. lastchar : longint;
  63. begin
  64. lastchar:=-1;
  65. loadunicodemapping:=nil;
  66. datasize:=256;
  67. getmem(data,sizeof(tunicodecharmapping)*datasize);
  68. assign(t,f);
  69. {$I-}
  70. reset(t);
  71. {$I+}
  72. if ioresult<>0 then
  73. begin
  74. freemem(data,sizeof(tunicodecharmapping)*datasize);
  75. exit;
  76. end;
  77. while not(eof(t)) do
  78. begin
  79. readln(t,s);
  80. if (s[1]='0') and (s[2]='x') then
  81. begin
  82. flag:=umf_unused;
  83. scanpos:=3;
  84. hs:='$';
  85. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  86. begin
  87. hs:=hs+s[scanpos];
  88. inc(scanpos);
  89. end;
  90. val(hs,charpos,code);
  91. if code<>0 then
  92. begin
  93. freemem(data,sizeof(tunicodecharmapping)*datasize);
  94. close(t);
  95. exit;
  96. end;
  97. while not(s[scanpos] in ['0','#']) do
  98. inc(scanpos);
  99. if s[scanpos]='#' then
  100. begin
  101. { special char }
  102. unicodevalue:=$ffff;
  103. hs:=copy(s,scanpos,length(s)-scanpos+1);
  104. if hs='#DBCS LEAD BYTE' then
  105. flag:=umf_leadbyte;
  106. end
  107. else
  108. begin
  109. { C hex prefix }
  110. inc(scanpos,2);
  111. hs:='$';
  112. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  113. begin
  114. hs:=hs+s[scanpos];
  115. inc(scanpos);
  116. end;
  117. val(hs,unicodevalue,code);
  118. if code<>0 then
  119. begin
  120. freemem(data,sizeof(tunicodecharmapping)*datasize);
  121. close(t);
  122. exit;
  123. end;
  124. if charpos>datasize then
  125. begin
  126. { allocate 1024 bytes more because }
  127. { if we need more than 256 entries it's }
  128. { probably a mbcs with a lot of }
  129. { entries }
  130. datasize:=charpos+1024;
  131. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  132. end;
  133. flag:=umf_noinfo;
  134. end;
  135. data[charpos].flag:=flag;
  136. data[charpos].unicode:=unicodevalue;
  137. if charpos>lastchar then
  138. lastchar:=charpos;
  139. end;
  140. end;
  141. close(t);
  142. new(p);
  143. p^.lastchar:=lastchar;
  144. p^.cpname:=cpname;
  145. p^.cp:=cp;
  146. p^.internalmap:=false;
  147. p^.next:=nil;
  148. p^.map:=data;
  149. loadunicodemapping:=p;
  150. end;
  151. procedure registermapping(p : punicodemap);
  152. begin
  153. p^.next:=mappings;
  154. mappings:=p;
  155. end;
  156. function getmap(const s : string) : punicodemap;
  157. var
  158. hp : punicodemap;
  159. const
  160. mapcache : string = '';
  161. mapcachep : punicodemap = nil;
  162. begin
  163. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  164. begin
  165. getmap:=mapcachep;
  166. exit;
  167. end;
  168. hp:=mappings;
  169. while assigned(hp) do
  170. begin
  171. if hp^.cpname=s then
  172. begin
  173. getmap:=hp;
  174. mapcache:=s;
  175. mapcachep:=hp;
  176. exit;
  177. end;
  178. hp:=hp^.next;
  179. end;
  180. getmap:=nil;
  181. end;
  182. function getmap(cp : word) : punicodemap;
  183. var
  184. hp : punicodemap;
  185. const
  186. mapcache : word = 0;
  187. mapcachep : punicodemap = nil;
  188. begin
  189. if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
  190. begin
  191. getmap:=mapcachep;
  192. exit;
  193. end;
  194. hp:=mappings;
  195. while assigned(hp) do
  196. begin
  197. if hp^.cp=cp then
  198. begin
  199. getmap:=hp;
  200. mapcache:=cp;
  201. mapcachep:=hp;
  202. exit;
  203. end;
  204. hp:=hp^.next;
  205. end;
  206. getmap:=nil;
  207. end;
  208. function mappingavailable(const s : string) : boolean;
  209. begin
  210. mappingavailable:=getmap(s)<>nil;
  211. end;
  212. function getunicode(c : char;p : punicodemap) : tunicodechar;
  213. begin
  214. if ord(c)<=p^.lastchar then
  215. getunicode:=p^.map[ord(c)].unicode
  216. else
  217. getunicode:=0;
  218. end;
  219. function getascii(c : tunicodechar;p : punicodemap) : string;
  220. var
  221. i : longint;
  222. begin
  223. { at least map to space }
  224. getascii:=#32;
  225. for i:=0 to p^.lastchar do
  226. if p^.map[i].unicode=c then
  227. begin
  228. if i<256 then
  229. getascii:=chr(i)
  230. else
  231. getascii:=chr(i div 256)+chr(i mod 256);
  232. exit;
  233. end;
  234. end;
  235. var
  236. hp : punicodemap;
  237. initialization
  238. mappings:=nil;
  239. finalization
  240. while assigned(mappings) do
  241. begin
  242. hp:=mappings^.next;
  243. if not(mappings^.internalmap) then
  244. begin
  245. freemem(mappings^.map);
  246. dispose(mappings);
  247. end;
  248. mappings:=hp;
  249. end;
  250. end.