ccharset.pas 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. { this unit is included temporarily for 2.2 bootstrapping and can be
  13. removed after the next release after 2.2.2 }
  14. {$mode objfpc}
  15. unit ccharset;
  16. interface
  17. type
  18. tunicodechar = word;
  19. tunicodestring = ^tunicodechar;
  20. tcsconvert = class
  21. // !!!!!!1constructor create;
  22. end;
  23. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  24. umf_unused);
  25. punicodecharmapping = ^tunicodecharmapping;
  26. tunicodecharmapping = record
  27. unicode : tunicodechar;
  28. flag : tunicodecharmappingflag;
  29. reserved : byte;
  30. end;
  31. punicodemap = ^tunicodemap;
  32. tunicodemap = record
  33. cpname : string[20];
  34. map : punicodecharmapping;
  35. lastchar : longint;
  36. next : punicodemap;
  37. internalmap : boolean;
  38. end;
  39. tcp2unicode = class(tcsconvert)
  40. end;
  41. function loadunicodemapping(const cpname,f : string) : punicodemap;
  42. procedure registermapping(p : punicodemap);
  43. function getmap(const s : string) : punicodemap;
  44. function mappingavailable(const s : string) : boolean;
  45. function getunicode(c : char;p : punicodemap) : tunicodechar;
  46. function getascii(c : tunicodechar;p : punicodemap) : string;
  47. implementation
  48. var
  49. mappings : punicodemap;
  50. function loadunicodemapping(const cpname,f : string) : punicodemap;
  51. var
  52. data : punicodecharmapping;
  53. datasize : longint;
  54. t : text;
  55. s,hs : string;
  56. scanpos,charpos,unicodevalue : longint;
  57. code : word;
  58. flag : tunicodecharmappingflag;
  59. p : punicodemap;
  60. lastchar : longint;
  61. begin
  62. lastchar:=-1;
  63. loadunicodemapping:=nil;
  64. datasize:=256;
  65. getmem(data,sizeof(tunicodecharmapping)*datasize);
  66. assign(t,f);
  67. {$I-}
  68. reset(t);
  69. {$I+}
  70. if ioresult<>0 then
  71. begin
  72. freemem(data,sizeof(tunicodecharmapping)*datasize);
  73. exit;
  74. end;
  75. while not(eof(t)) do
  76. begin
  77. readln(t,s);
  78. if (s[1]='0') and (s[2]='x') then
  79. begin
  80. flag:=umf_unused;
  81. scanpos:=3;
  82. hs:='$';
  83. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  84. begin
  85. hs:=hs+s[scanpos];
  86. inc(scanpos);
  87. end;
  88. val(hs,charpos,code);
  89. if code<>0 then
  90. begin
  91. freemem(data,sizeof(tunicodecharmapping)*datasize);
  92. close(t);
  93. exit;
  94. end;
  95. while not(s[scanpos] in ['0','#']) do
  96. inc(scanpos);
  97. if s[scanpos]='#' then
  98. begin
  99. { special char }
  100. unicodevalue:=$ffff;
  101. hs:=copy(s,scanpos,length(s)-scanpos+1);
  102. if hs='#DBCS LEAD BYTE' then
  103. flag:=umf_leadbyte;
  104. end
  105. else
  106. begin
  107. { C hex prefix }
  108. inc(scanpos,2);
  109. hs:='$';
  110. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  111. begin
  112. hs:=hs+s[scanpos];
  113. inc(scanpos);
  114. end;
  115. val(hs,unicodevalue,code);
  116. if code<>0 then
  117. begin
  118. freemem(data,sizeof(tunicodecharmapping)*datasize);
  119. close(t);
  120. exit;
  121. end;
  122. if charpos>datasize then
  123. begin
  124. { allocate 1024 bytes more because }
  125. { if we need more than 256 entries it's }
  126. { probably a mbcs with a lot of }
  127. { entries }
  128. datasize:=charpos+1024;
  129. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  130. end;
  131. flag:=umf_noinfo;
  132. end;
  133. data[charpos].flag:=flag;
  134. data[charpos].unicode:=unicodevalue;
  135. if charpos>lastchar then
  136. lastchar:=charpos;
  137. end;
  138. end;
  139. close(t);
  140. new(p);
  141. p^.lastchar:=lastchar;
  142. p^.cpname:=cpname;
  143. p^.internalmap:=false;
  144. p^.next:=nil;
  145. p^.map:=data;
  146. loadunicodemapping:=p;
  147. end;
  148. procedure registermapping(p : punicodemap);
  149. begin
  150. p^.next:=mappings;
  151. mappings:=p;
  152. end;
  153. function getmap(const s : string) : punicodemap;
  154. var
  155. hp : punicodemap;
  156. const
  157. mapcache : string = '';
  158. mapcachep : punicodemap = nil;
  159. begin
  160. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  161. begin
  162. getmap:=mapcachep;
  163. exit;
  164. end;
  165. hp:=mappings;
  166. while assigned(hp) do
  167. begin
  168. if hp^.cpname=s then
  169. begin
  170. getmap:=hp;
  171. mapcache:=s;
  172. mapcachep:=hp;
  173. exit;
  174. end;
  175. hp:=hp^.next;
  176. end;
  177. getmap:=nil;
  178. end;
  179. function mappingavailable(const s : string) : boolean;
  180. begin
  181. mappingavailable:=getmap(s)<>nil;
  182. end;
  183. function getunicode(c : char;p : punicodemap) : tunicodechar;
  184. begin
  185. if ord(c)<=p^.lastchar then
  186. getunicode:=p^.map[ord(c)].unicode
  187. else
  188. getunicode:=0;
  189. end;
  190. function getascii(c : tunicodechar;p : punicodemap) : string;
  191. var
  192. i : longint;
  193. begin
  194. { at least map to space }
  195. getascii:=#32;
  196. for i:=0 to p^.lastchar do
  197. if p^.map[i].unicode=c then
  198. begin
  199. if i<256 then
  200. getascii:=chr(i)
  201. else
  202. getascii:=chr(i div 256)+chr(i mod 256);
  203. exit;
  204. end;
  205. end;
  206. var
  207. hp : punicodemap;
  208. initialization
  209. mappings:=nil;
  210. finalization
  211. while assigned(mappings) do
  212. begin
  213. hp:=mappings^.next;
  214. if not(mappings^.internalmap) then
  215. begin
  216. freemem(mappings^.map);
  217. dispose(mappings);
  218. end;
  219. mappings:=hp;
  220. end;
  221. end.