widestr.pas 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. This unit contains basic functions for unicode support in the
  4. compiler, this unit is mainly necessary to bootstrap widestring
  5. support ...
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit widestr;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. {$if FPC_FULLVERSION<20700}ccharset{$else}charset{$endif},globtype;
  24. type
  25. tcompilerwidechar = word;
  26. tcompilerwidecharptr = ^tcompilerwidechar;
  27. pcompilerwidechar = ^tcompilerwidechar;
  28. pcompilerwidestring = ^_tcompilerwidestring;
  29. _tcompilerwidestring = record
  30. data : pcompilerwidechar;
  31. maxlen,len : SizeInt;
  32. end;
  33. procedure initwidestring(out r : pcompilerwidestring);
  34. procedure donewidestring(var r : pcompilerwidestring);
  35. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  36. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  37. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  38. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  39. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  40. procedure copywidestring(s,d : pcompilerwidestring);
  41. function asciichar2unicode(c : char) : tcompilerwidechar;
  42. function unicode2asciichar(c : tcompilerwidechar) : char;
  43. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  44. procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
  45. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  46. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  47. function cpavailable(const s : string) : boolean;
  48. function cpavailable(cp : word) : boolean;
  49. procedure changecodepage(
  50. s : pchar; l : SizeInt; scp : tstringencoding;
  51. d : pchar; dcp : tstringencoding
  52. );
  53. function codepagebyname(const s : string) : tstringencoding;
  54. implementation
  55. uses
  56. cp8859_1,cp850,cp437,cp1252,cp646,
  57. { cyrillic code pages }
  58. cp1251,cp866,cp8859_5,
  59. globals,cutils;
  60. procedure initwidestring(out r : pcompilerwidestring);
  61. begin
  62. new(r);
  63. r^.data:=nil;
  64. r^.len:=0;
  65. r^.maxlen:=0;
  66. end;
  67. procedure donewidestring(var r : pcompilerwidestring);
  68. begin
  69. if assigned(r^.data) then
  70. freemem(r^.data);
  71. dispose(r);
  72. r:=nil;
  73. end;
  74. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  75. begin
  76. getcharwidestring:=r^.data[l];
  77. end;
  78. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  79. begin
  80. getlengthwidestring:=r^.len;
  81. end;
  82. procedure growwidestring(r : pcompilerwidestring;l : SizeInt);
  83. begin
  84. if r^.maxlen>=l then
  85. exit;
  86. if assigned(r^.data) then
  87. reallocmem(r^.data,sizeof(tcompilerwidechar)*l)
  88. else
  89. getmem(r^.data,sizeof(tcompilerwidechar)*l);
  90. r^.maxlen:=l;
  91. end;
  92. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  93. begin
  94. r^.len:=l;
  95. if l>r^.maxlen then
  96. growwidestring(r,l);
  97. end;
  98. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  99. begin
  100. if r^.len>=r^.maxlen then
  101. growwidestring(r,r^.len+16);
  102. r^.data[r^.len]:=c;
  103. inc(r^.len);
  104. end;
  105. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  106. begin
  107. growwidestring(s1,s1^.len+s2^.len);
  108. move(s2^.data^,s1^.data[s1^.len],s2^.len*sizeof(tcompilerwidechar));
  109. inc(s1^.len,s2^.len);
  110. end;
  111. procedure copywidestring(s,d : pcompilerwidestring);
  112. begin
  113. setlengthwidestring(d,s^.len);
  114. move(s^.data^,d^.data^,s^.len*sizeof(tcompilerwidechar));
  115. end;
  116. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  117. var
  118. maxi,temp : SizeInt;
  119. begin
  120. if pointer(s1)=pointer(s2) then
  121. begin
  122. comparewidestrings:=0;
  123. exit;
  124. end;
  125. maxi:=s1^.len;
  126. temp:=s2^.len;
  127. if maxi>temp then
  128. maxi:=Temp;
  129. temp:=compareword(s1^.data^,s2^.data^,maxi);
  130. if temp=0 then
  131. temp:=s1^.len-s2^.len;
  132. comparewidestrings:=temp;
  133. end;
  134. function asciichar2unicode(c : char) : tcompilerwidechar;
  135. var
  136. m : punicodemap;
  137. begin
  138. if (current_settings.sourcecodepage <> CP_UTF8) then
  139. begin
  140. m:=getmap(current_settings.sourcecodepage);
  141. asciichar2unicode:=getunicode(c,m);
  142. end
  143. else
  144. result:=tcompilerwidechar(c);
  145. end;
  146. function unicode2asciichar(c : tcompilerwidechar) : char;
  147. {begin
  148. if word(c)<128 then
  149. unicode2asciichar:=char(word(c))
  150. else
  151. unicode2asciichar:='?';
  152. end;}
  153. begin
  154. Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
  155. end;
  156. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  157. var
  158. source : pchar;
  159. dest : tcompilerwidecharptr;
  160. i : SizeInt;
  161. m : punicodemap;
  162. begin
  163. m:=getmap(cp);
  164. setlengthwidestring(r,l);
  165. source:=p;
  166. dest:=tcompilerwidecharptr(r^.data);
  167. if (current_settings.sourcecodepage <> CP_UTF8) and
  168. codepagetranslation then
  169. begin
  170. for i:=1 to l do
  171. begin
  172. dest^:=getunicode(source^,m);
  173. inc(dest);
  174. inc(source);
  175. end;
  176. end
  177. else
  178. begin
  179. for i:=1 to l do
  180. begin
  181. dest^:=tcompilerwidechar(source^);
  182. inc(dest);
  183. inc(source);
  184. end;
  185. end;
  186. end;
  187. procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
  188. var
  189. m : punicodemap;
  190. source : tcompilerwidecharptr;
  191. dest : pchar;
  192. i : longint;
  193. begin
  194. if (cp = 0) or (cp=CP_NONE) then
  195. m:=getmap(current_settings.sourcecodepage)
  196. else
  197. m:=getmap(cp);
  198. // !!!! MBCS
  199. source:=tcompilerwidecharptr(r^.data);
  200. dest:=p;
  201. for i:=1 to r^.len do
  202. begin
  203. dest^ := getascii(source^,m)[1];
  204. inc(dest);
  205. inc(source);
  206. end;
  207. end;
  208. (*
  209. var
  210. source : tcompilerwidecharptr;
  211. dest : pchar;
  212. i : longint;
  213. begin
  214. { This routine must work the same as the
  215. the routine in the RTL to have the same compile time (for constant strings)
  216. and runtime conversion (for variables) }
  217. source:=tcompilerwidecharptr(r^.data);
  218. dest:=p;
  219. for i:=1 to r^.len do
  220. begin
  221. if word(source^)<128 then
  222. dest^:=char(word(source^))
  223. else
  224. dest^:='?';
  225. inc(dest);
  226. inc(source);
  227. end;
  228. end;
  229. *)
  230. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  231. var
  232. source : tcompilerwidecharptr;
  233. i : longint;
  234. begin
  235. source:=tcompilerwidecharptr(p^.data);
  236. result:=true;
  237. for i:=1 to p^.len do
  238. begin
  239. if word(source^)>=128 then
  240. exit;
  241. inc(source);
  242. end;
  243. result:=false;
  244. end;
  245. function cpavailable(const s : string) : boolean;
  246. begin
  247. cpavailable:=mappingavailable(lower(s));
  248. end;
  249. function cpavailable(cp : word) : boolean;
  250. begin
  251. cpavailable:=mappingavailable(cp);
  252. end;
  253. procedure changecodepage(
  254. s : pchar; l : SizeInt; scp : tstringencoding;
  255. d : pchar; dcp : tstringencoding
  256. );
  257. var
  258. ms, md : punicodemap;
  259. source : pchar;
  260. dest : pchar;
  261. i : longint;
  262. begin
  263. ms:=getmap(scp);
  264. md:=getmap(dcp);
  265. source:=s;
  266. dest:=d;
  267. for i:=1 to l do
  268. begin
  269. dest^ := getascii(getunicode(source^,ms),md)[1];
  270. inc(dest);
  271. inc(source);
  272. end;
  273. end;
  274. function codepagebyname(const s : string) : tstringencoding;
  275. var
  276. p : punicodemap;
  277. begin
  278. Result:=0;
  279. p:=getmap(s);
  280. if (p<>nil) then
  281. Result:=p^.cp;
  282. end;
  283. end.