widestr.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. This unit contains basic functions for unicode support in the
  4. compiler, this unit is mainly necessary to bootstrap widestring
  5. support ...
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit widestr;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. charset,globtype;
  24. type
  25. tcompilerwidechar = word;
  26. tcompilerwidecharptr = ^tcompilerwidechar;
  27. pcompilerwidechar = ^tcompilerwidechar;
  28. pcompilerwidestring = ^_tcompilerwidestring;
  29. _tcompilerwidestring = record
  30. data : pcompilerwidechar;
  31. maxlen,len : SizeInt;
  32. end;
  33. procedure initwidestring(out r : pcompilerwidestring);
  34. procedure donewidestring(var r : pcompilerwidestring);
  35. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  36. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  37. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  38. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  39. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  40. procedure copywidestring(s,d : pcompilerwidestring);
  41. function asciichar2unicode(c : char) : tcompilerwidechar;
  42. function unicode2asciichar(c : tcompilerwidechar) : char;
  43. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  44. procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
  45. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  46. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  47. function cpavailable(const s: string) : boolean;
  48. function cpavailable(cp: word) : boolean;
  49. procedure changecodepage(
  50. s : pchar; l : SizeInt; scp : tstringencoding;
  51. d : pchar; dcp : tstringencoding
  52. );
  53. function codepagebyname(const s : string) : tstringencoding;
  54. function charlength(p: pchar; len: sizeint): sizeint;
  55. function charlength(const s: string): sizeint;
  56. implementation
  57. uses
  58. { use only small codepage maps, others will be }
  59. { loaded on demand from -FM path }
  60. { cyrillic code pages }
  61. cp1251,cp866,cp8859_5,
  62. { greek code page }
  63. cp1253,
  64. { other code pages }
  65. cp8859_1,cp850,cp437,cp1252,cp646,
  66. cp874, cp856,cp852,cp8859_2,
  67. cp1250,cp1254,cp1255,cp1256,cp1257,cp1258,
  68. globals,cutils;
  69. procedure initwidestring(out r : pcompilerwidestring);
  70. begin
  71. new(r);
  72. r^.data:=nil;
  73. r^.len:=0;
  74. r^.maxlen:=0;
  75. end;
  76. procedure donewidestring(var r : pcompilerwidestring);
  77. begin
  78. if assigned(r^.data) then
  79. freemem(r^.data);
  80. dispose(r);
  81. r:=nil;
  82. end;
  83. function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  84. begin
  85. getcharwidestring:=r^.data[l];
  86. end;
  87. function getlengthwidestring(r : pcompilerwidestring) : SizeInt;
  88. begin
  89. getlengthwidestring:=r^.len;
  90. end;
  91. procedure growwidestring(r : pcompilerwidestring;l : SizeInt);
  92. begin
  93. if r^.maxlen>=l then
  94. exit;
  95. if assigned(r^.data) then
  96. reallocmem(r^.data,sizeof(tcompilerwidechar)*l)
  97. else
  98. getmem(r^.data,sizeof(tcompilerwidechar)*l);
  99. r^.maxlen:=l;
  100. end;
  101. procedure setlengthwidestring(r : pcompilerwidestring;l : SizeInt);
  102. begin
  103. r^.len:=l;
  104. if l>r^.maxlen then
  105. growwidestring(r,l);
  106. end;
  107. procedure concatwidestringchar(r : pcompilerwidestring;c : tcompilerwidechar);
  108. begin
  109. if r^.len>=r^.maxlen then
  110. growwidestring(r,r^.len+16);
  111. r^.data[r^.len]:=c;
  112. inc(r^.len);
  113. end;
  114. procedure concatwidestrings(s1,s2 : pcompilerwidestring);
  115. begin
  116. growwidestring(s1,s1^.len+s2^.len);
  117. move(s2^.data^,s1^.data[s1^.len],s2^.len*sizeof(tcompilerwidechar));
  118. inc(s1^.len,s2^.len);
  119. end;
  120. procedure copywidestring(s,d : pcompilerwidestring);
  121. begin
  122. setlengthwidestring(d,s^.len);
  123. move(s^.data^,d^.data^,s^.len*sizeof(tcompilerwidechar));
  124. end;
  125. function comparewidestrings(s1,s2 : pcompilerwidestring) : SizeInt;
  126. var
  127. maxi,temp : SizeInt;
  128. begin
  129. if pointer(s1)=pointer(s2) then
  130. begin
  131. comparewidestrings:=0;
  132. exit;
  133. end;
  134. maxi:=s1^.len;
  135. temp:=s2^.len;
  136. if maxi>temp then
  137. maxi:=Temp;
  138. temp:=compareword(s1^.data^,s2^.data^,maxi);
  139. if temp=0 then
  140. temp:=s1^.len-s2^.len;
  141. comparewidestrings:=temp;
  142. end;
  143. function asciichar2unicode(c : char) : tcompilerwidechar;
  144. var
  145. m : punicodemap;
  146. begin
  147. if (current_settings.sourcecodepage <> CP_UTF8) then
  148. begin
  149. m:=getmap(current_settings.sourcecodepage);
  150. asciichar2unicode:=getunicode(c,m);
  151. end
  152. else
  153. result:=tcompilerwidechar(c);
  154. end;
  155. function unicode2asciichar(c : tcompilerwidechar) : char;
  156. {begin
  157. if word(c)<128 then
  158. unicode2asciichar:=char(word(c))
  159. else
  160. unicode2asciichar:='?';
  161. end;}
  162. begin
  163. Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
  164. end;
  165. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
  166. var
  167. source : pchar;
  168. dest : tcompilerwidecharptr;
  169. i : SizeInt;
  170. m : punicodemap;
  171. begin
  172. m:=getmap(cp);
  173. setlengthwidestring(r,l);
  174. source:=p;
  175. dest:=tcompilerwidecharptr(r^.data);
  176. if codepagetranslation then
  177. begin
  178. if cp<>CP_UTF8 then
  179. begin
  180. for i:=1 to l do
  181. begin
  182. dest^:=getunicode(source^,m);
  183. inc(dest);
  184. inc(source);
  185. end;
  186. end
  187. else
  188. begin
  189. r^.len:=Utf8ToUnicode(punicodechar(r^.data),r^.maxlen,p,l);
  190. { -1, because utf8tounicode includes room for a terminating 0 in
  191. its result count }
  192. if r^.len>0 then
  193. dec(r^.len);
  194. end;
  195. end
  196. else
  197. begin
  198. for i:=1 to l do
  199. begin
  200. dest^:=tcompilerwidechar(source^);
  201. inc(dest);
  202. inc(source);
  203. end;
  204. end;
  205. end;
  206. procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
  207. var
  208. m : punicodemap;
  209. source : tcompilerwidecharptr;
  210. dest : pchar;
  211. i : longint;
  212. begin
  213. { can't implement that here, because the memory size for p() cannot
  214. be changed here, and we may need more bytes than have been allocated }
  215. if cp=CP_UTF8 then
  216. internalerrorproc(2015092701);
  217. if (cp = 0) or (cp=CP_NONE) then
  218. m:=getmap(current_settings.sourcecodepage)
  219. else
  220. m:=getmap(cp);
  221. source:=tcompilerwidecharptr(r^.data);
  222. dest:=p;
  223. for i:=1 to r^.len do
  224. begin
  225. dest^ := getascii(source^,m)[1];
  226. inc(dest);
  227. inc(source);
  228. end;
  229. end;
  230. function hasnonasciichars(const p: pcompilerwidestring): boolean;
  231. var
  232. source : tcompilerwidecharptr;
  233. i : longint;
  234. begin
  235. source:=tcompilerwidecharptr(p^.data);
  236. result:=true;
  237. for i:=1 to p^.len do
  238. begin
  239. if word(source^)>=128 then
  240. exit;
  241. inc(source);
  242. end;
  243. result:=false;
  244. end;
  245. function cpavailable(const s: string): boolean;
  246. begin
  247. result:=mappingavailable(lower(s));
  248. if not result then
  249. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset',lower(s)));
  250. end;
  251. function cpavailable(cp: word): boolean;
  252. begin
  253. result:=mappingavailable(cp);
  254. if not result then
  255. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset','cp'+tostr(cp)));
  256. end;
  257. procedure changecodepage(
  258. s : pchar; l : SizeInt; scp : tstringencoding;
  259. d : pchar; dcp : tstringencoding
  260. );
  261. var
  262. ms, md : punicodemap;
  263. source : pchar;
  264. dest : pchar;
  265. i : longint;
  266. begin
  267. ms:=getmap(scp);
  268. md:=getmap(dcp);
  269. source:=s;
  270. dest:=d;
  271. for i:=1 to l do
  272. begin
  273. dest^ := getascii(getunicode(source^,ms),md)[1];
  274. inc(dest);
  275. inc(source);
  276. end;
  277. end;
  278. function codepagebyname(const s : string) : tstringencoding;
  279. var
  280. p : punicodemap;
  281. begin
  282. Result:=0;
  283. p:=getmap(lower(s));
  284. if (p<>nil) then
  285. Result:=p^.cp;
  286. end;
  287. function charlength(p: pchar; len: sizeint): sizeint;
  288. {$IFDEF FPC_HAS_CPSTRING}
  289. var
  290. p2: pchar;
  291. i, chars, codepointlen: sizeint;
  292. {$ENDIF FPC_HAS_CPSTRING}
  293. begin
  294. {$IFDEF FPC_HAS_CPSTRING}
  295. if len=0 then
  296. begin
  297. result:=0;
  298. exit;
  299. end;
  300. { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1)
  301. should be equal to the amount of characters in the source string. }
  302. if defaultsystemcodepage=cp_utf8 then
  303. { ChangeCodePage does not work for UTF-8 apparently... :-( }
  304. begin
  305. i:=1;
  306. chars:=0;
  307. while i<=len do
  308. begin
  309. codepointlen:=utf8codepointlen(p,len-i+1,true);
  310. inc(i,codepointlen);
  311. inc(p,codepointlen);
  312. inc(chars);
  313. end;
  314. result:=chars;
  315. end
  316. else if cpavailable(defaultsystemcodepage) then
  317. begin
  318. getmem(p2,succ(len));
  319. fillchar(p2^,succ(len),0);
  320. changecodepage(p,len,defaultsystemcodepage,p2,28591);
  321. result:=strlen(p2);
  322. freemem(p2,succ(len));
  323. end
  324. else
  325. result:=len;
  326. {$ELSE FPC_HAS_CPSTRING}
  327. result:=len;
  328. {$ENDIF FPC_HAS_CPSTRING}
  329. end;
  330. function charlength(const s: string): sizeint;
  331. begin
  332. result:=charlength(@s[1],length(s));
  333. end;
  334. end.