charset.pp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000 by Florian Klaempfl
  5. member of the Free Pascal development team.
  6. This unit implements several classes for charset conversions
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$mode objfpc}
  14. unit charset;
  15. interface
  16. type
  17. tunicodechar = word;
  18. tunicodestring = ^tunicodechar;
  19. tcsconvert = class
  20. // !!!!!!1constructor create;
  21. end;
  22. tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
  23. umf_unused);
  24. punicodecharmapping = ^tunicodecharmapping;
  25. tunicodecharmapping = record
  26. unicode : tunicodechar;
  27. flag : tunicodecharmappingflag;
  28. reserved : byte;
  29. end;
  30. punicodemap = ^tunicodemap;
  31. tunicodemap = record
  32. cpname : shortstring;
  33. map : punicodecharmapping;
  34. lastchar : longint;
  35. next : punicodemap;
  36. internalmap : boolean;
  37. end;
  38. tcp2unicode = class(tcsconvert)
  39. end;
  40. function loadunicodemapping(const cpname,f : string) : punicodemap;
  41. procedure registermapping(p : punicodemap);
  42. implementation
  43. var
  44. mappings : punicodemap;
  45. function loadunicodemapping(const cpname,f : string) : punicodemap;
  46. var
  47. data : punicodecharmapping;
  48. datasize : longint;
  49. t : text;
  50. s,hs : string;
  51. scanpos,charpos,unicodevalue : longint;
  52. code : word;
  53. flag : tunicodecharmappingflag;
  54. p : punicodemap;
  55. lastchar : longint;
  56. begin
  57. lastchar:=-1;
  58. loadunicodemapping:=nil;
  59. datasize:=256;
  60. getmem(data,sizeof(tunicodecharmapping)*datasize);
  61. assign(t,f);
  62. {$I-}
  63. reset(t);
  64. {$I+}
  65. if ioresult<>0 then
  66. begin
  67. freemem(data,sizeof(tunicodecharmapping)*datasize);
  68. exit;
  69. end;
  70. readln(t,s);
  71. while not(eof(t)) do
  72. begin
  73. if (s[1]='0') and (s[2]='x') then
  74. begin
  75. flag:=umf_unused;
  76. scanpos:=3;
  77. hs:='$';
  78. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  79. begin
  80. hs:=hs+s[scanpos];
  81. inc(scanpos);
  82. end;
  83. val(hs,charpos,code);
  84. if code<>0 then
  85. begin
  86. freemem(data,sizeof(tunicodecharmapping)*datasize);
  87. close(t);
  88. exit;
  89. end;
  90. while not(s[scanpos] in ['0','#']) do
  91. inc(scanpos);
  92. if s[scanpos]='#' then
  93. begin
  94. { special char }
  95. unicodevalue:=$ffff;
  96. hs:=copy(s,scanpos,length(s)-scanpos+1);
  97. if hs='#DBCS LEAD BYTE' then
  98. flag:=umf_leadbyte;
  99. end
  100. else
  101. begin
  102. { C hex prefix }
  103. inc(scanpos,2);
  104. hs:='$';
  105. while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
  106. begin
  107. hs:=hs+s[scanpos];
  108. inc(scanpos);
  109. end;
  110. val(hs,unicodevalue,code);
  111. if code<>0 then
  112. begin
  113. freemem(data,sizeof(tunicodecharmapping)*datasize);
  114. close(t);
  115. exit;
  116. end;
  117. if charpos>datasize then
  118. begin
  119. { allocate 1024 bytes more because }
  120. { if we need more than 256 entries it's }
  121. { probably a mbcs with a lot of }
  122. { entries }
  123. datasize:=charpos+1024;
  124. reallocmem(data,sizeof(tunicodecharmapping)*datasize);
  125. end;
  126. flag:=umf_noinfo;
  127. end;
  128. data[charpos].flag:=flag;
  129. data[charpos].unicode:=unicodevalue;
  130. if charpos>lastchar then
  131. lastchar:=charpos;
  132. end;
  133. readln(t,s);
  134. end;
  135. close(t);
  136. new(p);
  137. p^.lastchar:=lastchar;
  138. p^.cpname:=cpname;
  139. p^.internalmap:=false;
  140. p^.next:=nil;
  141. p^.map:=data;
  142. loadunicodemapping:=p;
  143. end;
  144. procedure registermapping(p : punicodemap);
  145. begin
  146. p^.next:=mappings;
  147. mappings:=p;
  148. end;
  149. var
  150. hp : punicodemap;
  151. initialization
  152. mappings:=nil;
  153. finalization
  154. while assigned(mappings) do
  155. begin
  156. hp:=mappings^.next;
  157. if not(mappings^.internalmap) then
  158. begin
  159. freemem(mappings^.map);
  160. dispose(mappings);
  161. end;
  162. mappings:=hp;
  163. end;
  164. end.
  165. {
  166. $Log$
  167. Revision 1.1 2000-08-17 07:29:39 florian
  168. + initial revision
  169. }