charset.pp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2000 by Florian Klaempfl
  4. member of the Free Pascal development team.
  5. This unit implements several classes for charset conversions
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. unit charset;
  14. interface
  15. type
  16. tunicodechar = word;
  17. tunicodestring = ^tunicodechar;
  18. tcsconvert = class
  19. // !!!!!!1constructor create;
  20. end;
  21. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  22. umf_unused);
  23. punicodecharmapping = ^tunicodecharmapping;
  24. tunicodecharmapping = record
  25. unicode : tunicodechar;
  26. flag : tunicodecharmappingflag;
  27. reserved : byte;
  28. end;
  29. punicodemap = ^tunicodemap;
  30. tunicodemap = record
  31. cpname : string[20];
  32. cp : word;
  33. map : punicodecharmapping;
  34. lastchar : longint;
  35. next : punicodemap;
  36. internalmap : boolean;
  37. end;
  38. tcp2unicode = class(tcsconvert)
  39. end;
  40. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  41. procedure registermapping(p : punicodemap);
  42. function getmap(const s : string) : punicodemap;
  43. function getmap(cp : word) : punicodemap;
  44. function mappingavailable(const s : string) : boolean;
  45. function mappingavailable(cp :word) : boolean;
  46. function getunicode(c : char;p : punicodemap) : tunicodechar;
  47. function getunicode(
  48. AAnsiStr : pansichar;
  49. AAnsiLen : SizeInt;
  50. AMap : punicodemap;
  51. ADest : tunicodestring
  52. ) : SizeInt;
  53. function getascii(c : tunicodechar;p : punicodemap) : string;
  54. function getascii(c : tunicodechar;p : punicodemap; ABuffer : PAnsiChar; ABufferLen : SizeInt) : SizeInt;
  55. implementation
  56. const
  57. UNKNOW_CHAR_A = ansichar(63);
  58. UNKNOW_CHAR_W = tunicodechar(63);
  59. var
  60. mappings : punicodemap;
  61. function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
  62. var
  63. data : punicodecharmapping;
  64. datasize : longint;
  65. t : text;
  66. s,hs : string;
  67. scanpos,charpos,unicodevalue : longint;
  68. code : word;
  69. flag : tunicodecharmappingflag;
  70. p : punicodemap;
  71. lastchar : longint;
  72. begin
  73. lastchar:=-1;
  74. loadunicodemapping:=nil;
  75. datasize:=256;
  76. getmem(data,sizeof(tunicodecharmapping)*datasize);
  77. assign(t,f);
  78. {$I-}
  79. reset(t);
  80. {$I+}
  81. if ioresult<>0 then
  82. begin
  83. freemem(data,sizeof(tunicodecharmapping)*datasize);
  84. exit;
  85. end;
  86. while not(eof(t)) do
  87. begin
  88. readln(t,s);
  89. if (s[1]='0') and (s[2]='x') then
  90. begin
  91. flag:=umf_unused;
  92. scanpos:=3;
  93. hs:='$';
  94. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  95. begin
  96. hs:=hs+s[scanpos];
  97. inc(scanpos);
  98. end;
  99. val(hs,charpos,code);
  100. if code<>0 then
  101. begin
  102. freemem(data,sizeof(tunicodecharmapping)*datasize);
  103. close(t);
  104. exit;
  105. end;
  106. while not(s[scanpos] in ['0','#']) do
  107. inc(scanpos);
  108. if s[scanpos]='#' then
  109. begin
  110. { special char }
  111. unicodevalue:=$ffff;
  112. hs:=copy(s,scanpos,length(s)-scanpos+1);
  113. if hs='#DBCS LEAD BYTE' then
  114. flag:=umf_leadbyte;
  115. end
  116. else
  117. begin
  118. { C hex prefix }
  119. inc(scanpos,2);
  120. hs:='$';
  121. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  122. begin
  123. hs:=hs+s[scanpos];
  124. inc(scanpos);
  125. end;
  126. val(hs,unicodevalue,code);
  127. if code<>0 then
  128. begin
  129. freemem(data,sizeof(tunicodecharmapping)*datasize);
  130. close(t);
  131. exit;
  132. end;
  133. if charpos>datasize then
  134. begin
  135. { allocate 1024 bytes more because }
  136. { if we need more than 256 entries it's }
  137. { probably a mbcs with a lot of }
  138. { entries }
  139. datasize:=charpos+1024;
  140. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  141. end;
  142. flag:=umf_noinfo;
  143. end;
  144. data[charpos].flag:=flag;
  145. data[charpos].unicode:=unicodevalue;
  146. if charpos>lastchar then
  147. lastchar:=charpos;
  148. end;
  149. end;
  150. close(t);
  151. new(p);
  152. p^.lastchar:=lastchar;
  153. p^.cpname:=cpname;
  154. p^.cp:=cp;
  155. p^.internalmap:=false;
  156. p^.next:=nil;
  157. p^.map:=data;
  158. loadunicodemapping:=p;
  159. end;
  160. procedure registermapping(p : punicodemap);
  161. begin
  162. p^.next:=mappings;
  163. mappings:=p;
  164. end;
  165. function getmap(const s : string) : punicodemap;
  166. var
  167. hp : punicodemap;
  168. const
  169. mapcache : string = '';
  170. mapcachep : punicodemap = nil;
  171. begin
  172. if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
  173. begin
  174. getmap:=mapcachep;
  175. exit;
  176. end;
  177. hp:=mappings;
  178. while assigned(hp) do
  179. begin
  180. if hp^.cpname=s then
  181. begin
  182. getmap:=hp;
  183. mapcache:=s;
  184. mapcachep:=hp;
  185. exit;
  186. end;
  187. hp:=hp^.next;
  188. end;
  189. getmap:=nil;
  190. end;////////
  191. function getmap(cp : word) : punicodemap;
  192. var
  193. hp : punicodemap;
  194. const
  195. mapcache : word = 0;
  196. mapcachep : punicodemap = nil;
  197. begin
  198. if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
  199. begin
  200. getmap:=mapcachep;
  201. exit;
  202. end;
  203. hp:=mappings;
  204. while assigned(hp) do
  205. begin
  206. if hp^.cp=cp then
  207. begin
  208. getmap:=hp;
  209. mapcache:=cp;
  210. mapcachep:=hp;
  211. exit;
  212. end;
  213. hp:=hp^.next;
  214. end;
  215. getmap:=nil;
  216. end;
  217. function mappingavailable(const s : string) : boolean;
  218. begin
  219. mappingavailable:=getmap(s)<>nil;
  220. end;
  221. function mappingavailable(cp : word) : boolean;
  222. begin
  223. mappingavailable:=getmap(cp)<>nil;
  224. end;
  225. function getunicode(c : char;p : punicodemap) : tunicodechar;
  226. begin
  227. if ord(c)<=p^.lastchar then
  228. getunicode:=p^.map[ord(c)].unicode
  229. else
  230. getunicode:=0;
  231. end;
  232. function getunicode(
  233. AAnsiStr : pansichar;
  234. AAnsiLen : SizeInt;
  235. AMap : punicodemap;
  236. ADest : tunicodestring
  237. ) : SizeInt;
  238. var
  239. i, c, k, destLen : longint;
  240. ps : pansichar;
  241. pd : ^tunicodechar;
  242. begin
  243. if (AAnsiStr=nil) or (AAnsiLen<=0) then
  244. exit(0);
  245. ps:=AAnsiStr;
  246. if (ADest=nil) then
  247. begin
  248. c:=AAnsiLen-1;
  249. destLen:=0;
  250. i:=0;
  251. while (i<=c) do
  252. begin
  253. if (ord(ps^)<=AMap^.lastchar) then
  254. begin
  255. if (AMap^.map[ord(ps^)].flag=umf_leadbyte) and (i<c) then
  256. Inc(ps);
  257. end;
  258. i:=i+1;
  259. Inc(ps);
  260. destLen:=destLen+1;
  261. end;
  262. exit(destLen);
  263. end;
  264. pd:=ADest;
  265. c:=AAnsiLen-1;
  266. i:=0;
  267. while (i<=c) do
  268. begin
  269. if (ord(ps^)<=AMap^.lastchar) then
  270. begin
  271. if (AMap^.map[ord(ps^)].flag=umf_leadbyte) then
  272. begin
  273. if (i<c) then
  274. begin
  275. k:=(Ord(ps^)*256);
  276. Inc(ps);
  277. k:=k+Ord(ps^);
  278. if (k<=AMap^.lastchar) then
  279. pd^:=AMap^.map[k].unicode
  280. else
  281. pd^:=UNKNOW_CHAR_W;
  282. end
  283. else
  284. pd^:=UNKNOW_CHAR_W;
  285. end
  286. else
  287. pd^:=AMap^.map[ord(ps^)].unicode
  288. end
  289. else
  290. pd^:=UNKNOW_CHAR_W;
  291. i:=i+1;
  292. Inc(ps);
  293. Inc(pd);
  294. end;
  295. result:=((PtrUInt(pd)-PtrUInt(ADest)) div SizeOf(tunicodechar));
  296. end;
  297. function getascii(c : tunicodechar;p : punicodemap) : string;
  298. var
  299. i : longint;
  300. begin
  301. { at least map to '?' }
  302. getascii:=#63;
  303. for i:=0 to p^.lastchar do
  304. if p^.map[i].unicode=c then
  305. begin
  306. if i<256 then
  307. getascii:=chr(i)
  308. else
  309. getascii:=chr(i div 256)+chr(i mod 256);
  310. exit;
  311. end;
  312. end;
  313. function getascii(c : tunicodechar;p : punicodemap; ABuffer : PAnsiChar; ABufferLen : SizeInt) : SizeInt;
  314. var
  315. i : longint;
  316. begin
  317. if (ABuffer<>nil) and (ABufferLen<=0) then
  318. begin
  319. Result:=-1;
  320. exit;
  321. end;
  322. for i:=0 to p^.lastchar do
  323. if p^.map[i].unicode=c then
  324. begin
  325. if (ABuffer=nil) then
  326. begin
  327. if i<256 then
  328. Result:=1
  329. else
  330. Result:=2;
  331. exit;
  332. end;
  333. if i<256 then
  334. begin
  335. Result:=1;
  336. ABuffer^:=chr(i);
  337. end
  338. else
  339. begin
  340. if (ABufferLen<2) then
  341. begin
  342. Result:=-1;
  343. exit;
  344. end;
  345. ABuffer^:=chr(i div 256);
  346. Inc(ABuffer);
  347. ABuffer^:=chr(i mod 256);
  348. end;
  349. exit;
  350. end;
  351. { at least map to '?' }
  352. Result := 1;
  353. if (ABuffer<>nil) then
  354. ABuffer^:=#63;
  355. end;
  356. var
  357. hp : punicodemap;
  358. initialization
  359. mappings:=nil;
  360. finalization
  361. while assigned(mappings) do
  362. begin
  363. hp:=mappings^.next;
  364. if not(mappings^.internalmap) then
  365. begin
  366. freemem(mappings^.map);
  367. dispose(mappings);
  368. end;
  369. mappings:=hp;
  370. end;
  371. end.