widestr.pas 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. This unit contains basic functions for unicode support in the
  4. compiler, this unit is mainly necessary to bootstrap widestring
  5. support ...
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit widestr;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. {$if FPC_FULLVERSION<20700}ccharset{$else}charset{$endif},globtype;
  24. type
  25. tcompilerwidechar = word;
  26. tcompilerwidecharptr = ^tcompilerwidechar;
  27. pcompilerwidechar = ^tcompilerwidechar;
  28. pcompilerwidestring = ^_tcompilerwidestring;
  29. _tcompilerwidestring = record
  30. data : pcompilerwidechar;
  31. maxlen,len : SizeInt;
  32. end;
  33. procedure initwidestring(out r : pcompilerwidestring);
  34. procedure donewidestring(var r : pcompilerwidestring);
  35. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  36. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  37. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  38. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  39. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  40. procedure copywidestring(s,d : pcompilerwidestring);
  41. function asciichar2unicode(c : char) : tcompilerwidechar;
  42. function unicode2asciichar(c : tcompilerwidechar) : char;
  43. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  44. procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
  45. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  46. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  47. function cpavailable(const s : string) : boolean;
  48. function cpavailable(cp : word) : boolean;
  49. procedure changecodepage(
  50. s : pchar; l : SizeInt; scp : tstringencoding;
  51. d : pchar; dcp : tstringencoding
  52. );
  53. function codepagebyname(const s : string) : tstringencoding;
  54. implementation
  55. uses
  56. cp8859_1,cp850,cp437,cp1252,cp646,
  57. { cyrillic code pages }
  58. cp1251,cp866,cp8859_5,
  59. { greek code page }
  60. cp1253,
  61. globals,cutils;
  62. procedure initwidestring(out r : pcompilerwidestring);
  63. begin
  64. new(r);
  65. r^.data:=nil;
  66. r^.len:=0;
  67. r^.maxlen:=0;
  68. end;
  69. procedure donewidestring(var r : pcompilerwidestring);
  70. begin
  71. if assigned(r^.data) then
  72. freemem(r^.data);
  73. dispose(r);
  74. r:=nil;
  75. end;
  76. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  77. begin
  78. getcharwidestring:=r^.data[l];
  79. end;
  80. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  81. begin
  82. getlengthwidestring:=r^.len;
  83. end;
  84. procedure growwidestring(r : pcompilerwidestring;l : SizeInt);
  85. begin
  86. if r^.maxlen>=l then
  87. exit;
  88. if assigned(r^.data) then
  89. reallocmem(r^.data,sizeof(tcompilerwidechar)*l)
  90. else
  91. getmem(r^.data,sizeof(tcompilerwidechar)*l);
  92. r^.maxlen:=l;
  93. end;
  94. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  95. begin
  96. r^.len:=l;
  97. if l>r^.maxlen then
  98. growwidestring(r,l);
  99. end;
  100. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  101. begin
  102. if r^.len>=r^.maxlen then
  103. growwidestring(r,r^.len+16);
  104. r^.data[r^.len]:=c;
  105. inc(r^.len);
  106. end;
  107. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  108. begin
  109. growwidestring(s1,s1^.len+s2^.len);
  110. move(s2^.data^,s1^.data[s1^.len],s2^.len*sizeof(tcompilerwidechar));
  111. inc(s1^.len,s2^.len);
  112. end;
  113. procedure copywidestring(s,d : pcompilerwidestring);
  114. begin
  115. setlengthwidestring(d,s^.len);
  116. move(s^.data^,d^.data^,s^.len*sizeof(tcompilerwidechar));
  117. end;
  118. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  119. var
  120. maxi,temp : SizeInt;
  121. begin
  122. if pointer(s1)=pointer(s2) then
  123. begin
  124. comparewidestrings:=0;
  125. exit;
  126. end;
  127. maxi:=s1^.len;
  128. temp:=s2^.len;
  129. if maxi>temp then
  130. maxi:=Temp;
  131. temp:=compareword(s1^.data^,s2^.data^,maxi);
  132. if temp=0 then
  133. temp:=s1^.len-s2^.len;
  134. comparewidestrings:=temp;
  135. end;
  136. function asciichar2unicode(c : char) : tcompilerwidechar;
  137. var
  138. m : punicodemap;
  139. begin
  140. if (current_settings.sourcecodepage <> CP_UTF8) then
  141. begin
  142. m:=getmap(current_settings.sourcecodepage);
  143. asciichar2unicode:=getunicode(c,m);
  144. end
  145. else
  146. result:=tcompilerwidechar(c);
  147. end;
  148. function unicode2asciichar(c : tcompilerwidechar) : char;
  149. {begin
  150. if word(c)<128 then
  151. unicode2asciichar:=char(word(c))
  152. else
  153. unicode2asciichar:='?';
  154. end;}
  155. begin
  156. Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
  157. end;
  158. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  159. var
  160. source : pchar;
  161. dest : tcompilerwidecharptr;
  162. i : SizeInt;
  163. m : punicodemap;
  164. begin
  165. m:=getmap(cp);
  166. setlengthwidestring(r,l);
  167. source:=p;
  168. dest:=tcompilerwidecharptr(r^.data);
  169. if (current_settings.sourcecodepage <> CP_UTF8) and
  170. codepagetranslation then
  171. begin
  172. for i:=1 to l do
  173. begin
  174. dest^:=getunicode(source^,m);
  175. inc(dest);
  176. inc(source);
  177. end;
  178. end
  179. else
  180. begin
  181. for i:=1 to l do
  182. begin
  183. dest^:=tcompilerwidechar(source^);
  184. inc(dest);
  185. inc(source);
  186. end;
  187. end;
  188. end;
  189. procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
  190. var
  191. m : punicodemap;
  192. source : tcompilerwidecharptr;
  193. dest : pchar;
  194. i : longint;
  195. begin
  196. if (cp = 0) or (cp=CP_NONE) then
  197. m:=getmap(current_settings.sourcecodepage)
  198. else
  199. m:=getmap(cp);
  200. // !!!! MBCS
  201. source:=tcompilerwidecharptr(r^.data);
  202. dest:=p;
  203. for i:=1 to r^.len do
  204. begin
  205. dest^ := getascii(source^,m)[1];
  206. inc(dest);
  207. inc(source);
  208. end;
  209. end;
  210. (*
  211. var
  212. source : tcompilerwidecharptr;
  213. dest : pchar;
  214. i : longint;
  215. begin
  216. { This routine must work the same as the
  217. the routine in the RTL to have the same compile time (for constant strings)
  218. and runtime conversion (for variables) }
  219. source:=tcompilerwidecharptr(r^.data);
  220. dest:=p;
  221. for i:=1 to r^.len do
  222. begin
  223. if word(source^)<128 then
  224. dest^:=char(word(source^))
  225. else
  226. dest^:='?';
  227. inc(dest);
  228. inc(source);
  229. end;
  230. end;
  231. *)
  232. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  233. var
  234. source : tcompilerwidecharptr;
  235. i : longint;
  236. begin
  237. source:=tcompilerwidecharptr(p^.data);
  238. result:=true;
  239. for i:=1 to p^.len do
  240. begin
  241. if word(source^)>=128 then
  242. exit;
  243. inc(source);
  244. end;
  245. result:=false;
  246. end;
  247. function cpavailable(const s : string) : boolean;
  248. begin
  249. cpavailable:=mappingavailable(lower(s));
  250. end;
  251. function cpavailable(cp : word) : boolean;
  252. begin
  253. cpavailable:=mappingavailable(cp);
  254. end;
  255. procedure changecodepage(
  256. s : pchar; l : SizeInt; scp : tstringencoding;
  257. d : pchar; dcp : tstringencoding
  258. );
  259. var
  260. ms, md : punicodemap;
  261. source : pchar;
  262. dest : pchar;
  263. i : longint;
  264. begin
  265. ms:=getmap(scp);
  266. md:=getmap(dcp);
  267. source:=s;
  268. dest:=d;
  269. for i:=1 to l do
  270. begin
  271. dest^ := getascii(getunicode(source^,ms),md)[1];
  272. inc(dest);
  273. inc(source);
  274. end;
  275. end;
  276. function codepagebyname(const s : string) : tstringencoding;
  277. var
  278. p : punicodemap;
  279. begin
  280. Result:=0;
  281. p:=getmap(s);
  282. if (p<>nil) then
  283. Result:=p^.cp;
  284. end;
  285. end.