widestr.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. This unit contains basic functions for unicode support in the
  4. compiler, this unit is mainly necessary to bootstrap widestring
  5. support ...
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit widestr;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. {$if FPC_FULLVERSION<20700}ccharset{$else}charset{$endif},globtype;
  24. type
  25. tcompilerwidechar = word;
  26. tcompilerwidecharptr = ^tcompilerwidechar;
  27. pcompilerwidechar = ^tcompilerwidechar;
  28. pcompilerwidestring = ^_tcompilerwidestring;
  29. _tcompilerwidestring = record
  30. data : pcompilerwidechar;
  31. maxlen,len : SizeInt;
  32. end;
  33. procedure initwidestring(out r : pcompilerwidestring);
  34. procedure donewidestring(var r : pcompilerwidestring);
  35. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  36. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  37. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  38. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  39. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  40. procedure copywidestring(s,d : pcompilerwidestring);
  41. function asciichar2unicode(c : char) : tcompilerwidechar;
  42. function unicode2asciichar(c : tcompilerwidechar) : char;
  43. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  44. procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
  45. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  46. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  47. function cpavailable(const s: string) : boolean;
  48. function cpavailable(cp: word) : boolean;
  49. procedure changecodepage(
  50. s : pchar; l : SizeInt; scp : tstringencoding;
  51. d : pchar; dcp : tstringencoding
  52. );
  53. function codepagebyname(const s : string) : tstringencoding;
  54. function charlength(p: pchar; len: sizeint): sizeint;
  55. function charlength(const s: string): sizeint;
  56. implementation
  57. uses
  58. {$if FPC_FULLVERSION>20700}
  59. { use only small codepage maps, others will be }
  60. { loaded on demand from -FM path }
  61. { cyrillic code pages }
  62. cp1251,cp866,cp8859_5,
  63. { greek code page }
  64. cp1253,
  65. { other code pages }
  66. cp8859_1,cp850,cp437,cp1252,cp646,
  67. cp874, cp856,cp852,cp8859_2,
  68. cp1250,cp1254,cp1255,cp1256,cp1257,cp1258,
  69. {$endif}
  70. globals,cutils;
  71. procedure initwidestring(out r : pcompilerwidestring);
  72. begin
  73. new(r);
  74. r^.data:=nil;
  75. r^.len:=0;
  76. r^.maxlen:=0;
  77. end;
  78. procedure donewidestring(var r : pcompilerwidestring);
  79. begin
  80. if assigned(r^.data) then
  81. freemem(r^.data);
  82. dispose(r);
  83. r:=nil;
  84. end;
  85. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  86. begin
  87. getcharwidestring:=r^.data[l];
  88. end;
  89. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  90. begin
  91. getlengthwidestring:=r^.len;
  92. end;
  93. procedure growwidestring(r : pcompilerwidestring;l : SizeInt);
  94. begin
  95. if r^.maxlen>=l then
  96. exit;
  97. if assigned(r^.data) then
  98. reallocmem(r^.data,sizeof(tcompilerwidechar)*l)
  99. else
  100. getmem(r^.data,sizeof(tcompilerwidechar)*l);
  101. r^.maxlen:=l;
  102. end;
  103. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  104. begin
  105. r^.len:=l;
  106. if l>r^.maxlen then
  107. growwidestring(r,l);
  108. end;
  109. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  110. begin
  111. if r^.len>=r^.maxlen then
  112. growwidestring(r,r^.len+16);
  113. r^.data[r^.len]:=c;
  114. inc(r^.len);
  115. end;
  116. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  117. begin
  118. growwidestring(s1,s1^.len+s2^.len);
  119. move(s2^.data^,s1^.data[s1^.len],s2^.len*sizeof(tcompilerwidechar));
  120. inc(s1^.len,s2^.len);
  121. end;
  122. procedure copywidestring(s,d : pcompilerwidestring);
  123. begin
  124. setlengthwidestring(d,s^.len);
  125. move(s^.data^,d^.data^,s^.len*sizeof(tcompilerwidechar));
  126. end;
  127. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  128. var
  129. maxi,temp : SizeInt;
  130. begin
  131. if pointer(s1)=pointer(s2) then
  132. begin
  133. comparewidestrings:=0;
  134. exit;
  135. end;
  136. maxi:=s1^.len;
  137. temp:=s2^.len;
  138. if maxi>temp then
  139. maxi:=Temp;
  140. temp:=compareword(s1^.data^,s2^.data^,maxi);
  141. if temp=0 then
  142. temp:=s1^.len-s2^.len;
  143. comparewidestrings:=temp;
  144. end;
  145. function asciichar2unicode(c : char) : tcompilerwidechar;
  146. var
  147. m : punicodemap;
  148. begin
  149. if (current_settings.sourcecodepage <> CP_UTF8) then
  150. begin
  151. m:=getmap(current_settings.sourcecodepage);
  152. asciichar2unicode:=getunicode(c,m);
  153. end
  154. else
  155. result:=tcompilerwidechar(c);
  156. end;
  157. function unicode2asciichar(c : tcompilerwidechar) : char;
  158. {begin
  159. if word(c)<128 then
  160. unicode2asciichar:=char(word(c))
  161. else
  162. unicode2asciichar:='?';
  163. end;}
  164. begin
  165. Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
  166. end;
  167. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  168. var
  169. source : pchar;
  170. dest : tcompilerwidecharptr;
  171. i : SizeInt;
  172. m : punicodemap;
  173. begin
  174. m:=getmap(cp);
  175. setlengthwidestring(r,l);
  176. source:=p;
  177. dest:=tcompilerwidecharptr(r^.data);
  178. if (current_settings.sourcecodepage <> CP_UTF8) and
  179. codepagetranslation then
  180. begin
  181. for i:=1 to l do
  182. begin
  183. dest^:=getunicode(source^,m);
  184. inc(dest);
  185. inc(source);
  186. end;
  187. end
  188. else
  189. begin
  190. for i:=1 to l do
  191. begin
  192. dest^:=tcompilerwidechar(source^);
  193. inc(dest);
  194. inc(source);
  195. end;
  196. end;
  197. end;
  198. procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
  199. var
  200. m : punicodemap;
  201. source : tcompilerwidecharptr;
  202. dest : pchar;
  203. i : longint;
  204. begin
  205. if (cp = 0) or (cp=CP_NONE) then
  206. m:=getmap(current_settings.sourcecodepage)
  207. else
  208. m:=getmap(cp);
  209. // !!!! MBCS
  210. source:=tcompilerwidecharptr(r^.data);
  211. dest:=p;
  212. for i:=1 to r^.len do
  213. begin
  214. dest^ := getascii(source^,m)[1];
  215. inc(dest);
  216. inc(source);
  217. end;
  218. end;
  219. (*
  220. var
  221. source : tcompilerwidecharptr;
  222. dest : pchar;
  223. i : longint;
  224. begin
  225. { This routine must work the same as the
  226. the routine in the RTL to have the same compile time (for constant strings)
  227. and runtime conversion (for variables) }
  228. source:=tcompilerwidecharptr(r^.data);
  229. dest:=p;
  230. for i:=1 to r^.len do
  231. begin
  232. if word(source^)<128 then
  233. dest^:=char(word(source^))
  234. else
  235. dest^:='?';
  236. inc(dest);
  237. inc(source);
  238. end;
  239. end;
  240. *)
  241. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  242. var
  243. source : tcompilerwidecharptr;
  244. i : longint;
  245. begin
  246. source:=tcompilerwidecharptr(p^.data);
  247. result:=true;
  248. for i:=1 to p^.len do
  249. begin
  250. if word(source^)>=128 then
  251. exit;
  252. inc(source);
  253. end;
  254. result:=false;
  255. end;
  256. function cpavailable(const s: string): boolean;
  257. begin
  258. result:=mappingavailable(lower(s));
  259. {$if FPC_FULLVERSION>20700}
  260. if not result then
  261. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset',lower(s)));
  262. {$ifend}
  263. end;
  264. function cpavailable(cp: word): boolean;
  265. begin
  266. result:=mappingavailable(cp);
  267. {$if FPC_FULLVERSION>20700}
  268. if not result then
  269. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset','cp'+tostr(cp)));
  270. {$ifend}
  271. end;
  272. procedure changecodepage(
  273. s : pchar; l : SizeInt; scp : tstringencoding;
  274. d : pchar; dcp : tstringencoding
  275. );
  276. var
  277. ms, md : punicodemap;
  278. source : pchar;
  279. dest : pchar;
  280. i : longint;
  281. begin
  282. ms:=getmap(scp);
  283. md:=getmap(dcp);
  284. source:=s;
  285. dest:=d;
  286. for i:=1 to l do
  287. begin
  288. dest^ := getascii(getunicode(source^,ms),md)[1];
  289. inc(dest);
  290. inc(source);
  291. end;
  292. end;
  293. function codepagebyname(const s : string) : tstringencoding;
  294. var
  295. p : punicodemap;
  296. begin
  297. Result:=0;
  298. p:=getmap(s);
  299. if (p<>nil) then
  300. Result:=p^.cp;
  301. end;
  302. function charlength(p: pchar; len: sizeint): sizeint;
  303. var
  304. p2: pchar;
  305. i, chars, codepointlen: sizeint;
  306. begin
  307. {$IFDEF FPC_HAS_CPSTRING}
  308. if len=0 then
  309. begin
  310. result:=0;
  311. exit;
  312. end;
  313. { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1)
  314. should be equal to the amount of characters in the source string. }
  315. if defaultsystemcodepage=cp_utf8 then
  316. { ChangeCodePage does not work for UTF-8 apparently... :-( }
  317. begin
  318. i:=1;
  319. chars:=0;
  320. while i<=len do
  321. begin
  322. codepointlen:=utf8codepointlen(p,len-i+1,true);
  323. inc(i,codepointlen);
  324. inc(p,codepointlen);
  325. inc(chars);
  326. end;
  327. result:=chars;
  328. end
  329. else if cpavailable(defaultsystemcodepage) then
  330. begin
  331. getmem(p2,succ(len));
  332. fillchar(p2^,succ(len),0);
  333. changecodepage(p,len,defaultsystemcodepage,p2,28591);
  334. result:=strlen(p2);
  335. freemem(p2,succ(len));
  336. end
  337. else
  338. result:=len;
  339. {$ELSE FPC_HAS_CPSTRING}
  340. result:=len;
  341. {$ENDIF FPC_HAS_CPSTRING}
  342. end;
  343. function charlength(const s: string): sizeint;
  344. begin
  345. result:=charlength(@s[1],length(s));
  346. end;
  347. end.