charset.pp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit charset;
  14. interface
  15. type
  16. tunicodechar = word;
  17. tunicodestring = ^tunicodechar;
  18. tcsconvert = class
  19. // !!!!!!1constructor create;
  20. end;
  21. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  22. umf_unused);
  23. punicodecharmapping = ^tunicodecharmapping;
  24. tunicodecharmapping = record
  25. unicode : tunicodechar;
  26. flag : tunicodecharmappingflag;
  27. reserved : byte;
  28. end;
  29. punicodemap = ^tunicodemap;
  30. tunicodemap = record
  31. cpname : string[20];
  32. map : punicodecharmapping;
  33. lastchar : longint;
  34. next : punicodemap;
  35. internalmap : boolean;
  36. end;
  37. tcp2unicode = class(tcsconvert)
  38. end;
  39. function loadunicodemapping(const cpname,f : string) : punicodemap;
  40. procedure registermapping(p : punicodemap);
  41. function getmap(const s : string) : punicodemap;
  42. function mappingavailable(const s : string) : boolean;
  43. function getunicode(c : char;p : punicodemap) : tunicodechar;
  44. function getascii(c : tunicodechar;p : punicodemap) : string;
  45. implementation
  46. var
  47. mappings : punicodemap;
  48. function loadunicodemapping(const cpname,f : string) : punicodemap;
  49. var
  50. data : punicodecharmapping;
  51. datasize : longint;
  52. t : text;
  53. s,hs : string;
  54. scanpos,charpos,unicodevalue : longint;
  55. code : word;
  56. flag : tunicodecharmappingflag;
  57. p : punicodemap;
  58. lastchar : longint;
  59. begin
  60. lastchar:=-1;
  61. loadunicodemapping:=nil;
  62. datasize:=256;
  63. getmem(data,sizeof(tunicodecharmapping)*datasize);
  64. assign(t,f);
  65. {$I-}
  66. reset(t);
  67. {$I+}
  68. if ioresult<>0 then
  69. begin
  70. freemem(data,sizeof(tunicodecharmapping)*datasize);
  71. exit;
  72. end;
  73. while not(eof(t)) do
  74. begin
  75. readln(t,s);
  76. if (s[1]='0') and (s[2]='x') then
  77. begin
  78. flag:=umf_unused;
  79. scanpos:=3;
  80. hs:='$';
  81. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  82. begin
  83. hs:=hs+s[scanpos];
  84. inc(scanpos);
  85. end;
  86. val(hs,charpos,code);
  87. if code<>0 then
  88. begin
  89. freemem(data,sizeof(tunicodecharmapping)*datasize);
  90. close(t);
  91. exit;
  92. end;
  93. while not(s[scanpos] in ['0','#']) do
  94. inc(scanpos);
  95. if s[scanpos]='#' then
  96. begin
  97. { special char }
  98. unicodevalue:=$ffff;
  99. hs:=copy(s,scanpos,length(s)-scanpos+1);
  100. if hs='#DBCS LEAD BYTE' then
  101. flag:=umf_leadbyte;
  102. end
  103. else
  104. begin
  105. { C hex prefix }
  106. inc(scanpos,2);
  107. hs:='$';
  108. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  109. begin
  110. hs:=hs+s[scanpos];
  111. inc(scanpos);
  112. end;
  113. val(hs,unicodevalue,code);
  114. if code<>0 then
  115. begin
  116. freemem(data,sizeof(tunicodecharmapping)*datasize);
  117. close(t);
  118. exit;
  119. end;
  120. if charpos>datasize then
  121. begin
  122. { allocate 1024 bytes more because }
  123. { if we need more than 256 entries it's }
  124. { probably a mbcs with a lot of }
  125. { entries }
  126. datasize:=charpos+1024;
  127. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  128. end;
  129. flag:=umf_noinfo;
  130. end;
  131. data[charpos].flag:=flag;
  132. data[charpos].unicode:=unicodevalue;
  133. if charpos>lastchar then
  134. lastchar:=charpos;
  135. end;
  136. end;
  137. close(t);
  138. new(p);
  139. p^.lastchar:=lastchar;
  140. p^.cpname:=cpname;
  141. p^.internalmap:=false;
  142. p^.next:=nil;
  143. p^.map:=data;
  144. loadunicodemapping:=p;
  145. end;
  146. procedure registermapping(p : punicodemap);
  147. begin
  148. p^.next:=mappings;
  149. mappings:=p;
  150. end;
  151. function getmap(const s : string) : punicodemap;
  152. var
  153. hp : punicodemap;
  154. const
  155. mapcache : string = '';
  156. mapcachep : punicodemap = nil;
  157. begin
  158. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  159. begin
  160. getmap:=mapcachep;
  161. exit;
  162. end;
  163. hp:=mappings;
  164. while assigned(hp) do
  165. begin
  166. if hp^.cpname=s then
  167. begin
  168. getmap:=hp;
  169. mapcache:=s;
  170. mapcachep:=hp;
  171. exit;
  172. end;
  173. hp:=hp^.next;
  174. end;
  175. getmap:=nil;
  176. end;
  177. function mappingavailable(const s : string) : boolean;
  178. begin
  179. mappingavailable:=getmap(s)<>nil;
  180. end;
  181. function getunicode(c : char;p : punicodemap) : tunicodechar;
  182. begin
  183. if ord(c)<=p^.lastchar then
  184. getunicode:=p^.map[ord(c)].unicode
  185. else
  186. getunicode:=0;
  187. end;
  188. function getascii(c : tunicodechar;p : punicodemap) : string;
  189. var
  190. i : longint;
  191. begin
  192. { at least map to space }
  193. getascii:=#32;
  194. for i:=0 to p^.lastchar do
  195. if p^.map[i].unicode=c then
  196. begin
  197. if i<256 then
  198. getascii:=chr(i)
  199. else
  200. getascii:=chr(i div 256)+chr(i mod 256);
  201. exit;
  202. end;
  203. end;
  204. var
  205. hp : punicodemap;
  206. initialization
  207. mappings:=nil;
  208. finalization
  209. while assigned(mappings) do
  210. begin
  211. hp:=mappings^.next;
  212. if not(mappings^.internalmap) then
  213. begin
  214. freemem(mappings^.map);
  215. dispose(mappings);
  216. end;
  217. mappings:=hp;
  218. end;
  219. end.