widestr.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. This unit contains basic functions for unicode support in the
  4. compiler, this unit is mainly necessary to bootstrap widestring
  5. support ...
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit widestr;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. charset,globtype;
  24. type
  25. tcompilerwidechar = word;
  26. tcompilerwidecharptr = ^tcompilerwidechar;
  27. pcompilerwidechar = ^tcompilerwidechar;
  28. tcompilerwidechararray = array of tcompilerwidechar;
  29. tcompilerwidestring = class
  30. data : tcompilerwidechararray;
  31. maxlen,len : SizeInt;
  32. function asconstpunicodechar : PUnicodeChar;
  33. end;
  34. procedure initwidestring(out r : tcompilerwidestring);
  35. procedure donewidestring(var r : tcompilerwidestring);
  36. procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt);
  37. function getlengthwidestring(r : tcompilerwidestring) : SizeInt;
  38. procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar);
  39. procedure concatwidestrings(s1,s2 : tcompilerwidestring);
  40. function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt;
  41. procedure copywidestring(s,d : tcompilerwidestring);
  42. function asciichar2unicode(c : char) : tcompilerwidechar;
  43. function unicode2asciichar(c : tcompilerwidechar) : char;
  44. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true);
  45. procedure unicode2ascii(r : tcompilerwidestring;p : pchar;cp : tstringencoding);
  46. procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding);
  47. function hasnonasciichars(const p: tcompilerwidestring): boolean;
  48. function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  49. function cpavailable(const s: string) : boolean;
  50. function cpavailable(cp: word) : boolean;
  51. procedure changecodepage(
  52. s : pchar; l : SizeInt; scp : tstringencoding;
  53. d : pchar; dcp : tstringencoding
  54. );
  55. function codepagebyname(const s : string) : tstringencoding;
  56. function charlength(p: pchar; len: sizeint): sizeint;
  57. function charlength(const s: string): sizeint;
  58. implementation
  59. uses
  60. { use only small codepage maps, others will be }
  61. { loaded on demand from -FM path }
  62. { cyrillic code pages }
  63. cp1251,cp866,cp8859_5,
  64. { greek code page }
  65. cp1253,
  66. { other code pages }
  67. cp8859_1,cp850,cp437,cp1252,cp646,
  68. cp874, cp856,cp852,cp8859_2,
  69. cp1250,cp1254,cp1255,cp1256,cp1257,cp1258,
  70. globals,cutils;
  71. procedure initwidestring(out r : tcompilerwidestring);
  72. begin
  73. R:=tcompilerwidestring.create;
  74. r.data:=nil;
  75. r.len:=0;
  76. r.maxlen:=0;
  77. end;
  78. procedure donewidestring(var r : tcompilerwidestring);
  79. begin
  80. r.Free;
  81. r:=nil;
  82. end;
  83. function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar;
  84. begin
  85. getcharwidestring:=r.data[l];
  86. end;
  87. function getlengthwidestring(r : tcompilerwidestring) : SizeInt;
  88. begin
  89. getlengthwidestring:=r.len;
  90. end;
  91. procedure growwidestring(r : tcompilerwidestring;l : SizeInt);
  92. begin
  93. if r.maxlen>=l then
  94. exit;
  95. setlength(r.data,l);
  96. r.maxlen:=l;
  97. end;
  98. procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt);
  99. begin
  100. r.len:=l;
  101. if l>r.maxlen then
  102. growwidestring(r,l);
  103. end;
  104. procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar);
  105. begin
  106. if r.len>=r.maxlen then
  107. growwidestring(r,r.len+16);
  108. r.data[r.len]:=c;
  109. inc(r.len);
  110. end;
  111. procedure concatwidestrings(s1,s2 : tcompilerwidestring);
  112. begin
  113. growwidestring(s1,s1.len+s2.len);
  114. if s2.len>0 then
  115. move(s2.data[0],s1.data[s1.len],s2.len*sizeof(tcompilerwidechar));
  116. inc(s1.len,s2.len);
  117. end;
  118. procedure copywidestring(s,d : tcompilerwidestring);
  119. begin
  120. setlengthwidestring(d,s.len);
  121. if s.len>0 then
  122. move(s.data[0],d.data[0],s.len*sizeof(tcompilerwidechar));
  123. end;
  124. function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt;
  125. var
  126. maxi,temp : SizeInt;
  127. begin
  128. if pointer(s1)=pointer(s2) then
  129. begin
  130. comparewidestrings:=0;
  131. exit;
  132. end;
  133. maxi:=s1.len;
  134. temp:=s2.len;
  135. if maxi>temp then
  136. maxi:=Temp;
  137. if maxi>0 then
  138. temp:=compareword(s1.data[0],s2.data[0],maxi)
  139. else
  140. temp:=0;
  141. if temp=0 then
  142. temp:=s1.len-s2.len;
  143. comparewidestrings:=temp;
  144. end;
  145. function asciichar2unicode(c : char) : tcompilerwidechar;
  146. var
  147. m : punicodemap;
  148. begin
  149. if (current_settings.sourcecodepage <> CP_UTF8) then
  150. begin
  151. m:=getmap(current_settings.sourcecodepage);
  152. asciichar2unicode:=getunicode(c,m);
  153. end
  154. else
  155. result:=tcompilerwidechar(c);
  156. end;
  157. function unicode2asciichar(c : tcompilerwidechar) : char;
  158. {begin
  159. if word(c)<128 then
  160. unicode2asciichar:=char(word(c))
  161. else
  162. unicode2asciichar:='?';
  163. end;}
  164. begin
  165. Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
  166. end;
  167. procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true);
  168. var
  169. source : pchar;
  170. dest : tcompilerwidecharptr;
  171. i : SizeInt;
  172. m : punicodemap;
  173. begin
  174. m:=getmap(cp);
  175. setlengthwidestring(r,l);
  176. source:=p;
  177. dest:=tcompilerwidecharptr(r.data);
  178. if codepagetranslation then
  179. begin
  180. if cp<>CP_UTF8 then
  181. begin
  182. for i:=1 to l do
  183. begin
  184. dest^:=getunicode(source^,m);
  185. inc(dest);
  186. inc(source);
  187. end;
  188. end
  189. else
  190. begin
  191. r.len:=Utf8ToUnicode(punicodechar(r.data),r.maxlen,p,l);
  192. { -1, because utf8tounicode includes room for a terminating 0 in
  193. its result count }
  194. if r.len>0 then
  195. dec(r.len);
  196. end;
  197. end
  198. else
  199. begin
  200. for i:=1 to l do
  201. begin
  202. dest^:=tcompilerwidechar(source^);
  203. inc(dest);
  204. inc(source);
  205. end;
  206. end;
  207. end;
  208. procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding);
  209. begin
  210. if (r.len=0) or (length(arr)=0) then
  211. exit;
  212. unicode2ascii(r,Pchar(@arr[0]),cp);
  213. end;
  214. procedure unicode2ascii(r : tcompilerwidestring;p:pchar;cp : tstringencoding);
  215. var
  216. m : punicodemap;
  217. source : tcompilerwidecharptr;
  218. dest : pchar;
  219. i : longint;
  220. begin
  221. { can't implement that here, because the memory size for p() cannot
  222. be changed here, and we may need more bytes than have been allocated }
  223. if cp=CP_UTF8 then
  224. internalerrorproc(2015092701);
  225. if (cp = 0) or (cp=CP_NONE) then
  226. m:=getmap(current_settings.sourcecodepage)
  227. else
  228. m:=getmap(cp);
  229. source:=tcompilerwidecharptr(r.data);
  230. dest:=p;
  231. for i:=1 to r.len do
  232. begin
  233. dest^ := getascii(source^,m)[1];
  234. inc(dest);
  235. inc(source);
  236. end;
  237. end;
  238. function hasnonasciichars(const p: tcompilerwidestring): boolean;
  239. var
  240. source : tcompilerwidecharptr;
  241. i : longint;
  242. begin
  243. source:=tcompilerwidecharptr(p.data);
  244. result:=true;
  245. for i:=1 to p.len do
  246. begin
  247. if word(source^)>=128 then
  248. exit;
  249. inc(source);
  250. end;
  251. result:=false;
  252. end;
  253. function cpavailable(const s: string): boolean;
  254. begin
  255. result:=mappingavailable(lower(s));
  256. if not result then
  257. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset',lower(s)));
  258. end;
  259. function cpavailable(cp: word): boolean;
  260. begin
  261. result:=mappingavailable(cp);
  262. if not result then
  263. result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset','cp'+tostr(cp)));
  264. end;
  265. procedure changecodepage(
  266. s : pchar; l : SizeInt; scp : tstringencoding;
  267. d : pchar; dcp : tstringencoding
  268. );
  269. var
  270. ms, md : punicodemap;
  271. source : pchar;
  272. dest : pchar;
  273. i : longint;
  274. begin
  275. ms:=getmap(scp);
  276. md:=getmap(dcp);
  277. source:=s;
  278. dest:=d;
  279. for i:=1 to l do
  280. begin
  281. dest^ := getascii(getunicode(source^,ms),md)[1];
  282. inc(dest);
  283. inc(source);
  284. end;
  285. end;
  286. function codepagebyname(const s : string) : tstringencoding;
  287. var
  288. p : punicodemap;
  289. begin
  290. Result:=0;
  291. p:=getmap(lower(s));
  292. if (p<>nil) then
  293. Result:=p^.cp;
  294. end;
  295. function charlength(p: pchar; len: sizeint): sizeint;
  296. var
  297. p2: pchar;
  298. i, chars, codepointlen: sizeint;
  299. begin
  300. if len=0 then
  301. begin
  302. result:=0;
  303. exit;
  304. end;
  305. { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1)
  306. should be equal to the amount of characters in the source string. }
  307. if defaultsystemcodepage=cp_utf8 then
  308. { ChangeCodePage does not work for UTF-8 apparently... :-( }
  309. begin
  310. i:=1;
  311. chars:=0;
  312. while i<=len do
  313. begin
  314. codepointlen:=utf8codepointlen(p,len-i+1,true);
  315. inc(i,codepointlen);
  316. inc(p,codepointlen);
  317. inc(chars);
  318. end;
  319. result:=chars;
  320. end
  321. else if cpavailable(defaultsystemcodepage) then
  322. begin
  323. getmem(p2,succ(len));
  324. fillchar(p2^,succ(len),0);
  325. changecodepage(p,len,defaultsystemcodepage,p2,28591);
  326. result:=strlen(p2);
  327. freemem(p2,succ(len));
  328. end
  329. else
  330. result:=len;
  331. end;
  332. function charlength(const s: string): sizeint;
  333. begin
  334. result:=charlength(@s[1],length(s));
  335. end;
  336. { tcompilerwidestring }
  337. const
  338. cEmptyUnicodeChar : UnicodeChar = #0;
  339. function tcompilerwidestring.asconstpunicodechar: PUnicodeChar;
  340. begin
  341. if length(data)>0 then
  342. result:=@Data[0]
  343. else
  344. result:=@cEmptyUnicodeChar;
  345. end;
  346. end.