wstrings.inc 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for WideStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. This file contains the implementation of the WideString type,
  14. and all things that are needed for it.
  15. WideString is defined as a 'silent' pwidechar :
  16. a pwidechar that points to :
  17. @-8 : SizeInt for reference count;
  18. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  19. with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
  20. Windows COM BSTR.
  21. @ : String + Terminating #0;
  22. Pwidechar(Widestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PWideRec = ^TWideRec;
  29. TWideRec = Packed Record
  30. Len : DWord;
  31. First : WideChar;
  32. end;
  33. Const
  34. WideRecLen = SizeOf(TWideRec);
  35. WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
  36. {
  37. Default WideChar <-> Char conversion is to only convert the
  38. lower 127 chars, all others are translated to spaces.
  39. These routines can be overwritten for the Current Locale
  40. }
  41. procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
  42. var
  43. i : SizeInt;
  44. begin
  45. setlength(dest,len);
  46. for i:=1 to len do
  47. begin
  48. if word(source^)<256 then
  49. dest[i]:=char(word(source^))
  50. else
  51. dest[i]:='?';
  52. inc(source);
  53. end;
  54. end;
  55. procedure DefaultAnsi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
  56. var
  57. i : SizeInt;
  58. begin
  59. setlength(dest,len);
  60. for i:=1 to len do
  61. begin
  62. dest[i]:=widechar(byte(source^));
  63. inc(source);
  64. end;
  65. end;
  66. (*
  67. Procedure UniqueWideString(Var S : WideString); [Public,Alias : 'FPC_WIDESTR_UNIQUE'];
  68. {
  69. Make sure reference count of S is 1,
  70. using copy-on-write semantics.
  71. }
  72. begin
  73. end;
  74. *)
  75. {****************************************************************************
  76. Internal functions, not in interface.
  77. ****************************************************************************}
  78. procedure WideStringError;
  79. begin
  80. HandleErrorFrame(204,get_frame);
  81. end;
  82. {$ifdef WideStrDebug}
  83. Procedure DumpWideRec(S : Pointer);
  84. begin
  85. If S=Nil then
  86. Writeln ('String is nil')
  87. Else
  88. Begin
  89. With PWideRec(S-WideFirstOff)^ do
  90. begin
  91. Write ('(Len:',len);
  92. Writeln (' Ref: ',ref,')');
  93. end;
  94. end;
  95. end;
  96. {$endif}
  97. Function NewWideString(Len : SizeInt) : Pointer;
  98. {
  99. Allocate a new WideString on the heap.
  100. initialize it to zero length and reference count 1.
  101. }
  102. Var
  103. P : Pointer;
  104. begin
  105. {$ifdef MSWINDOWS}
  106. if winwidestringalloc then
  107. begin
  108. P:=SysAllocStringLen(nil,Len);
  109. if P=nil then
  110. WideStringError;
  111. end
  112. else
  113. {$endif MSWINDOWS}
  114. begin
  115. GetMem(P,Len*sizeof(WideChar)+WideRecLen);
  116. If P<>Nil then
  117. begin
  118. PWideRec(P)^.Len:=Len*2; { Initial length }
  119. PWideRec(P)^.First:=#0; { Terminating #0 }
  120. inc(p,WideFirstOff); { Points to string now }
  121. end
  122. else
  123. WideStringError;
  124. end;
  125. NewWideString:=P;
  126. end;
  127. Procedure DisposeWideString(Var S : Pointer);
  128. {
  129. Deallocates a WideString From the heap.
  130. }
  131. begin
  132. If S=Nil then
  133. exit;
  134. {$ifndef MSWINDOWS}
  135. Dec (S,WideFirstOff);
  136. Freemem(S);
  137. {$else MSWINDOWS}
  138. if winwidestringalloc then
  139. SysFreeString(S)
  140. else
  141. begin
  142. Dec (S,WideFirstOff);
  143. Freemem(S);
  144. end;
  145. {$endif MSWINDOWS}
  146. S:=Nil;
  147. end;
  148. var
  149. __data_start: byte; external name '__data_start__';
  150. __data_end: byte; external name '__data_end__';
  151. function IsWideStringConstant(S: pointer): boolean;{$ifdef SYSTEMINLINE}inline;{$endif}
  152. {
  153. Returns True if widestring is constant (located in .data section);
  154. }
  155. begin
  156. Result:=(S>=@__data_start) and (S<@__data_end);
  157. end;
  158. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
  159. {
  160. Decreases the ReferenceCount of a non constant widestring;
  161. If the reference count is zero, deallocate the string;
  162. }
  163. Type
  164. pSizeInt = ^SizeInt;
  165. Begin
  166. { Zero string }
  167. if S=Nil then
  168. exit;
  169. if not IsWideStringConstant(S) then
  170. DisposeWideString(S);
  171. end;
  172. { alias for internal use }
  173. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
  174. Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
  175. var
  176. p : pointer;
  177. Begin
  178. If S=Nil then
  179. exit;
  180. p:=NewWidestring(length(WideString(S)));
  181. move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
  182. s:=p;
  183. end;
  184. { alias for internal use }
  185. Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
  186. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  187. function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
  188. {
  189. Converts a WideString to a ShortString;
  190. }
  191. Var
  192. Size : SizeInt;
  193. temp : ansistring;
  194. begin
  195. result:='';
  196. Size:=Length(S2);
  197. if Size>0 then
  198. begin
  199. If Size>high_of_res then
  200. Size:=high_of_res;
  201. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
  202. result:=temp;
  203. end;
  204. end;
  205. {$else FPC_STRTOSHORTSTRINGPROC}
  206. procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
  207. {
  208. Converts a WideString to a ShortString;
  209. }
  210. Var
  211. Size : SizeInt;
  212. temp : ansistring;
  213. begin
  214. res:='';
  215. Size:=Length(S2);
  216. if Size>0 then
  217. begin
  218. If Size>high(res) then
  219. Size:=high(res);
  220. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
  221. res:=temp;
  222. end;
  223. end;
  224. {$endif FPC_STRTOSHORTSTRINGPROC}
  225. Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
  226. {
  227. Converts a ShortString to a WideString;
  228. }
  229. Var
  230. Size : SizeInt;
  231. begin
  232. result:='';
  233. Size:=Length(S2);
  234. if Size>0 then
  235. begin
  236. widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),result,Size);
  237. { Terminating Zero }
  238. PWideChar(Pointer(fpc_ShortStr_To_WideStr)+Size*sizeof(WideChar))^:=#0;
  239. end;
  240. end;
  241. Function fpc_WideStr_To_AnsiStr (const S2 : WideString): AnsiString; compilerproc;
  242. {
  243. Converts a WideString to an AnsiString
  244. }
  245. Var
  246. Size : SizeInt;
  247. begin
  248. result:='';
  249. Size:=Length(S2);
  250. if Size>0 then
  251. widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,Size);
  252. end;
  253. Function fpc_AnsiStr_To_WideStr (Const S2 : AnsiString): WideString; compilerproc;
  254. {
  255. Converts an AnsiString to a WideString;
  256. }
  257. Var
  258. Size : SizeInt;
  259. begin
  260. result:='';
  261. Size:=Length(S2);
  262. if Size>0 then
  263. widestringmanager.Ansi2WideMoveProc(PChar(S2),result,Size);
  264. end;
  265. Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
  266. var
  267. Size : SizeInt;
  268. begin
  269. result:='';
  270. if p=nil then
  271. exit;
  272. Size := IndexWord(p^, -1, 0);
  273. Setlength(result,Size);
  274. if Size>0 then
  275. begin
  276. Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
  277. { Terminating Zero }
  278. PWideChar(Pointer(result)+Size*sizeof(WideChar))^:=#0;
  279. end;
  280. end;
  281. { checked against the ansistring routine, 2001-05-27 (FK) }
  282. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
  283. {
  284. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  285. }
  286. begin
  287. if S1=S2 then exit;
  288. if S2<>nil then
  289. begin
  290. if IsWideStringConstant(S1) then
  291. begin
  292. S1:=NewWidestring(length(WideString(S2)));
  293. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  294. end
  295. else
  296. {$ifdef MSWINDOWS}
  297. if winwidestringalloc then
  298. begin
  299. if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
  300. WideStringError;
  301. end
  302. else
  303. {$endif MSWINDOWS}
  304. begin
  305. SetLength(WideString(S1),length(WideString(S2)));
  306. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  307. end;
  308. end
  309. else
  310. begin
  311. { Free S1 }
  312. fpc_widestr_decr_ref (S1);
  313. S1:=nil;
  314. end;
  315. end;
  316. { alias for internal use }
  317. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
  318. {$ifndef STR_CONCAT_PROCS}
  319. function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
  320. Var
  321. Size,Location : SizeInt;
  322. pc : pwidechar;
  323. begin
  324. { only assign if s1 or s2 is empty }
  325. if (S1='') then
  326. begin
  327. result:=s2;
  328. exit;
  329. end;
  330. if (S2='') then
  331. begin
  332. result:=s1;
  333. exit;
  334. end;
  335. Location:=Length(S1);
  336. Size:=length(S2);
  337. SetLength(result,Size+Location);
  338. pc:=pwidechar(result);
  339. Move(S1[1],pc^,Location*sizeof(WideChar));
  340. inc(pc,location);
  341. Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
  342. end;
  343. function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
  344. Var
  345. i : Longint;
  346. p : pointer;
  347. pc : pwidechar;
  348. Size,NewSize : SizeInt;
  349. begin
  350. { First calculate size of the result so we can do
  351. a single call to SetLength() }
  352. NewSize:=0;
  353. for i:=low(sarr) to high(sarr) do
  354. inc(Newsize,length(sarr[i]));
  355. SetLength(result,NewSize);
  356. pc:=pwidechar(result);
  357. for i:=low(sarr) to high(sarr) do
  358. begin
  359. p:=pointer(sarr[i]);
  360. if assigned(p) then
  361. begin
  362. Size:=length(widestring(p));
  363. Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
  364. inc(pc,size);
  365. end;
  366. end;
  367. end;
  368. {$else STR_CONCAT_PROCS}
  369. procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
  370. Var
  371. Size,Location : SizeInt;
  372. same : boolean;
  373. begin
  374. { only assign if s1 or s2 is empty }
  375. if (S1='') then
  376. begin
  377. DestS:=s2;
  378. exit;
  379. end;
  380. if (S2='') then
  381. begin
  382. DestS:=s1;
  383. exit;
  384. end;
  385. Location:=Length(S1);
  386. Size:=length(S2);
  387. { Use Pointer() typecasts to prevent extra conversion code }
  388. if Pointer(DestS)=Pointer(S1) then
  389. begin
  390. same:=Pointer(S1)=Pointer(S2);
  391. SetLength(DestS,Size+Location);
  392. if same then
  393. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
  394. else
  395. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  396. end
  397. else if Pointer(DestS)=Pointer(S2) then
  398. begin
  399. SetLength(DestS,Size+Location);
  400. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  401. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  402. end
  403. else
  404. begin
  405. DestS:='';
  406. SetLength(DestS,Size+Location);
  407. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  408. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  409. end;
  410. end;
  411. procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
  412. Var
  413. i : Longint;
  414. p,pc : pointer;
  415. Size,NewLen : SizeInt;
  416. DestTmp : Widestring;
  417. begin
  418. if high(sarr)=0 then
  419. begin
  420. DestS:='';
  421. exit;
  422. end;
  423. { First calculate size of the result so we can do
  424. a single call to SetLength() }
  425. NewLen:=0;
  426. for i:=low(sarr) to high(sarr) do
  427. inc(NewLen,length(sarr[i]));
  428. SetLength(DestTmp,NewLen);
  429. pc:=pwidechar(DestTmp);
  430. for i:=low(sarr) to high(sarr) do
  431. begin
  432. p:=pointer(sarr[i]);
  433. if assigned(p) then
  434. begin
  435. Size:=length(widestring(p));
  436. Move(p^,pc^,(Size+1)*sizeof(WideChar));
  437. inc(pc,size*sizeof(WideChar));
  438. end;
  439. end;
  440. DestS:=DestTmp;
  441. end;
  442. {$endif STR_CONCAT_PROCS}
  443. Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
  444. {
  445. Converts a Char to a WideString;
  446. }
  447. begin
  448. Setlength(fpc_Char_To_WideStr,1);
  449. fpc_Char_To_WideStr[1]:=c;
  450. { Terminating Zero }
  451. PWideChar(Pointer(fpc_Char_To_WideStr)+sizeof(WideChar))^:=#0;
  452. end;
  453. Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  454. {
  455. Converts a WideChar to a WideString;
  456. }
  457. begin
  458. Setlength (fpc_WChar_To_WideStr,1);
  459. fpc_WChar_To_WideStr[1]:= c;
  460. end;
  461. Function fpc_WChar_To_AnsiStr(const c : WideChar): AnsiString; compilerproc;
  462. {
  463. Converts a WideChar to a AnsiString;
  464. }
  465. begin
  466. widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr, 1);
  467. end;
  468. Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  469. {
  470. Converts a WideChar to a WideString;
  471. }
  472. begin
  473. Setlength (fpc_UChar_To_WideStr,1);
  474. fpc_UChar_To_WideStr[1]:= c;
  475. end;
  476. Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
  477. Var
  478. L : SizeInt;
  479. begin
  480. if (not assigned(p)) or (p[0]=#0) Then
  481. begin
  482. fpc_pchar_to_widestr := '';
  483. exit;
  484. end;
  485. l:=IndexChar(p^,-1,#0);
  486. widestringmanager.Ansi2WideMoveProc(P,fpc_PChar_To_WideStr,l);
  487. end;
  488. Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
  489. var
  490. i : SizeInt;
  491. begin
  492. if (zerobased) then
  493. begin
  494. if (arr[0]=#0) Then
  495. begin
  496. fpc_chararray_to_widestr := '';
  497. exit;
  498. end;
  499. i:=IndexChar(arr,high(arr)+1,#0);
  500. if i = -1 then
  501. i := high(arr)+1;
  502. end
  503. else
  504. i := high(arr)+1;
  505. SetLength(fpc_CharArray_To_WideStr,i);
  506. widestringmanager.Ansi2WideMoveProc (pchar(@arr),fpc_CharArray_To_WideStr,i);
  507. end;
  508. {$ifndef FPC_STRTOCHARARRAYPROC}
  509. { inside the compiler, the resulttype is modified to that of the actual }
  510. { chararray we're converting to (JM) }
  511. function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
  512. var
  513. len: SizeInt;
  514. temp: ansistring;
  515. begin
  516. len := length(src);
  517. { make sure we don't dereference src if it can be nil (JM) }
  518. if len > 0 then
  519. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
  520. len := length(temp);
  521. if len > arraysize then
  522. len := arraysize;
  523. {$r-}
  524. move(temp[1],fpc_widestr_to_chararray[0],len);
  525. fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
  526. {$ifdef RangeCheckWasOn}
  527. {$r+}
  528. {$endif}
  529. end;
  530. { inside the compiler, the resulttype is modified to that of the actual }
  531. { widechararray we're converting to (JM) }
  532. function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
  533. var
  534. len: SizeInt;
  535. begin
  536. len := length(src);
  537. if len > arraysize then
  538. len := arraysize;
  539. {$r-}
  540. { make sure we don't try to access element 1 of the ansistring if it's nil }
  541. if len > 0 then
  542. move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
  543. fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  544. {$ifdef RangeCheckWasOn}
  545. {$r+}
  546. {$endif}
  547. end;
  548. { inside the compiler, the resulttype is modified to that of the actual }
  549. { chararray we're converting to (JM) }
  550. function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
  551. var
  552. len: SizeInt;
  553. temp: widestring;
  554. begin
  555. len := length(src);
  556. { make sure we don't dereference src if it can be nil (JM) }
  557. if len > 0 then
  558. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  559. len := length(temp);
  560. if len > arraysize then
  561. len := arraysize;
  562. {$r-}
  563. move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
  564. fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  565. {$ifdef RangeCheckWasOn}
  566. {$r+}
  567. {$endif}
  568. end;
  569. function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
  570. var
  571. len: longint;
  572. temp : widestring;
  573. begin
  574. len := length(src);
  575. { make sure we don't access char 1 if length is 0 (JM) }
  576. if len > 0 then
  577. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  578. len := length(temp);
  579. if len > arraysize then
  580. len := arraysize;
  581. {$r-}
  582. move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
  583. fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  584. {$ifdef RangeCheckWasOn}
  585. {$r+}
  586. {$endif}
  587. end;
  588. {$else ndef FPC_STRTOCHARARRAYPROC}
  589. procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
  590. var
  591. len: SizeInt;
  592. temp: ansistring;
  593. begin
  594. len := length(src);
  595. { make sure we don't dereference src if it can be nil (JM) }
  596. if len > 0 then
  597. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
  598. len := length(temp);
  599. if len > length(res) then
  600. len := length(res);
  601. {$r-}
  602. move(temp[1],res[0],len);
  603. fillchar(res[len],length(res)-len,0);
  604. {$ifdef RangeCheckWasOn}
  605. {$r+}
  606. {$endif}
  607. end;
  608. procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
  609. var
  610. len: SizeInt;
  611. begin
  612. len := length(src);
  613. if len > length(res) then
  614. len := length(res);
  615. {$r-}
  616. { make sure we don't try to access element 1 of the ansistring if it's nil }
  617. if len > 0 then
  618. move(src[1],res[0],len*SizeOf(WideChar));
  619. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  620. {$ifdef RangeCheckWasOn}
  621. {$r+}
  622. {$endif}
  623. end;
  624. {$endif ndef FPC_STRTOCHARARRAYPROC}
  625. Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
  626. {
  627. Compares 2 WideStrings;
  628. The result is
  629. <0 if S1<S2
  630. 0 if S1=S2
  631. >0 if S1>S2
  632. }
  633. Var
  634. MaxI,Temp : SizeInt;
  635. begin
  636. if pointer(S1)=pointer(S2) then
  637. begin
  638. fpc_WideStr_Compare:=0;
  639. exit;
  640. end;
  641. Maxi:=Length(S1);
  642. temp:=Length(S2);
  643. If MaxI>Temp then
  644. MaxI:=Temp;
  645. Temp:=CompareWord(S1[1],S2[1],MaxI);
  646. if temp=0 then
  647. temp:=Length(S1)-Length(S2);
  648. fpc_WideStr_Compare:=Temp;
  649. end;
  650. Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
  651. {
  652. Compares 2 WideStrings for equality only;
  653. The result is
  654. 0 if S1=S2
  655. <>0 if S1<>S2
  656. }
  657. Var
  658. MaxI : SizeInt;
  659. begin
  660. if pointer(S1)=pointer(S2) then
  661. exit(0);
  662. Maxi:=Length(S1);
  663. If MaxI<>Length(S2) then
  664. exit(-1)
  665. else
  666. exit(CompareWord(S1[1],S2[1],MaxI));
  667. end;
  668. Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
  669. begin
  670. if p=nil then
  671. HandleErrorFrame(201,get_frame);
  672. end;
  673. Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  674. begin
  675. if (index>len div 2) or (Index<1) then
  676. HandleErrorFrame(201,get_frame);
  677. end;
  678. Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
  679. {
  680. Sets The length of string S to L.
  681. Makes sure S is unique, and contains enough room.
  682. }
  683. Var
  684. Temp : Pointer;
  685. movelen: SizeInt;
  686. begin
  687. if (l>0) then
  688. begin
  689. if Pointer(S)=nil then
  690. begin
  691. { Need a complete new string...}
  692. Pointer(s):=NewWideString(l);
  693. end
  694. { windows doesn't support reallocing widestrings, this code
  695. is anyways subject to be removed because widestrings shouldn't be
  696. ref. counted anymore (FK) }
  697. else
  698. if
  699. {$ifdef MSWINDOWS}
  700. not winwidestringalloc and
  701. {$endif MSWINDOWS}
  702. not IsWideStringConstant(pointer(S))
  703. then
  704. begin
  705. Dec(Pointer(S),WideFirstOff);
  706. if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
  707. reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
  708. Inc(Pointer(S), WideFirstOff);
  709. end
  710. else
  711. begin
  712. { Reallocation is needed... }
  713. Temp:=Pointer(NewWideString(L));
  714. if Length(S)>0 then
  715. begin
  716. if l < succ(length(s)) then
  717. movelen := l
  718. { also move terminating null }
  719. else
  720. movelen := succ(length(s));
  721. Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
  722. end;
  723. fpc_widestr_decr_ref(Pointer(S));
  724. Pointer(S):=Temp;
  725. end;
  726. { Force nil termination in case it gets shorter }
  727. PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
  728. {$ifdef MSWINDOWS}
  729. if not winwidestringalloc then
  730. {$endif MSWINDOWS}
  731. PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
  732. end
  733. else
  734. begin
  735. { Length=0 }
  736. if Pointer(S)<>nil then
  737. fpc_widestr_decr_ref (Pointer(S));
  738. Pointer(S):=Nil;
  739. end;
  740. end;
  741. {*****************************************************************************
  742. Public functions, In interface.
  743. *****************************************************************************}
  744. Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
  745. begin
  746. pointer(result) := pointer(s);
  747. end;
  748. Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
  749. var
  750. ResultAddress : Pointer;
  751. begin
  752. ResultAddress:=Nil;
  753. dec(index);
  754. if Index < 0 then
  755. Index := 0;
  756. { Check Size. Accounts for Zero-length S, the double check is needed because
  757. Size can be maxint and will get <0 when adding index }
  758. if (Size>Length(S)) or
  759. (Index+Size>Length(S)) then
  760. Size:=Length(S)-Index;
  761. If Size>0 then
  762. begin
  763. If Index<0 Then
  764. Index:=0;
  765. ResultAddress:=Pointer(NewWideString (Size));
  766. if ResultAddress<>Nil then
  767. begin
  768. Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
  769. PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
  770. PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
  771. end;
  772. end;
  773. fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
  774. Pointer(fpc_widestr_Copy):=ResultAddress;
  775. end;
  776. Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
  777. var
  778. i,MaxLen : SizeInt;
  779. pc : pwidechar;
  780. begin
  781. Pos:=0;
  782. if Length(SubStr)>0 then
  783. begin
  784. MaxLen:=Length(source)-Length(SubStr);
  785. i:=0;
  786. pc:=@source[1];
  787. while (i<=MaxLen) do
  788. begin
  789. inc(i);
  790. if (SubStr[1]=pc^) and
  791. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  792. begin
  793. Pos:=i;
  794. exit;
  795. end;
  796. inc(pc);
  797. end;
  798. end;
  799. end;
  800. { Faster version for a widechar alone }
  801. Function Pos (c : WideChar; Const s : WideString) : SizeInt;
  802. var
  803. i: SizeInt;
  804. pc : pwidechar;
  805. begin
  806. pc:=@s[1];
  807. for i:=1 to length(s) do
  808. begin
  809. if pc^=c then
  810. begin
  811. pos:=i;
  812. exit;
  813. end;
  814. inc(pc);
  815. end;
  816. pos:=0;
  817. end;
  818. Function Pos (c : WideChar; Const s : AnsiString) : SizeInt;
  819. begin
  820. result:=Pos(c,WideString(s));
  821. end;
  822. Function Pos (c : AnsiString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  823. begin
  824. result:=Pos(WideString(c),s);
  825. end;
  826. Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  827. begin
  828. result:=Pos(WideString(c),s);
  829. end;
  830. Function Pos (c : WideString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  831. begin
  832. result:=Pos(c,WideString(s));
  833. end;
  834. { Faster version for a char alone. Must be implemented because }
  835. { pos(c: char; const s: shortstring) also exists, so otherwise }
  836. { using pos(char,pchar) will always call the shortstring version }
  837. { (exact match for first argument), also with $h+ (JM) }
  838. Function Pos (c : Char; Const s : WideString) : SizeInt;
  839. var
  840. i: SizeInt;
  841. wc : widechar;
  842. pc : pwidechar;
  843. begin
  844. wc:=c;
  845. pc:=@s[1];
  846. for i:=1 to length(s) do
  847. begin
  848. if pc^=wc then
  849. begin
  850. pos:=i;
  851. exit;
  852. end;
  853. inc(pc);
  854. end;
  855. pos:=0;
  856. end;
  857. Procedure Delete (Var S : WideString; Index,Size: SizeInt);
  858. Var
  859. LS : SizeInt;
  860. begin
  861. If Length(S)=0 then
  862. exit;
  863. if index<=0 then
  864. exit;
  865. LS:=PWideRec(Pointer(S)-WideFirstOff)^.Len div sizeof(WideChar);
  866. if (Index<=LS) and (Size>0) then
  867. begin
  868. UniqueString (S);
  869. if Size+Index>LS then
  870. Size:=LS-Index+1;
  871. if Index+Size<=LS then
  872. begin
  873. Dec(Index);
  874. Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
  875. end;
  876. Setlength(s,LS-Size);
  877. end;
  878. end;
  879. Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
  880. var
  881. Temp : WideString;
  882. LS : SizeInt;
  883. begin
  884. If Length(Source)=0 then
  885. exit;
  886. if index <= 0 then
  887. index := 1;
  888. Ls:=Length(S);
  889. if index > LS then
  890. index := LS+1;
  891. Dec(Index);
  892. Pointer(Temp) := NewWideString(Length(Source)+LS);
  893. SetLength(Temp,Length(Source)+LS);
  894. If Index>0 then
  895. move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
  896. Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
  897. If (LS-Index)>0 then
  898. Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
  899. S:=Temp;
  900. end;
  901. function UpCase(const s : WideString) : WideString;
  902. begin
  903. result:=widestringmanager.UpperWideStringProc(s);
  904. end;
  905. Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
  906. begin
  907. SetLength(S,Len);
  908. If (Buf<>Nil) and (Len>0) then
  909. Move (Buf[0],S[1],Len*sizeof(WideChar));
  910. end;
  911. Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
  912. begin
  913. SetLength(S,Len);
  914. If (Buf<>Nil) and (Len>0) then
  915. widestringmanager.Ansi2WideMoveProc(Buf,S,Len);
  916. end;
  917. {$ifndef FPUNONE}
  918. Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
  919. Var
  920. SS : String;
  921. begin
  922. fpc_Val_Real_WideStr := 0;
  923. if length(S) > 255 then
  924. code := 256
  925. else
  926. begin
  927. SS := S;
  928. Val(SS,fpc_Val_Real_WideStr,code);
  929. end;
  930. end;
  931. {$endif}
  932. function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
  933. var ss:shortstring;
  934. begin
  935. if length(s)>255 then
  936. code:=256
  937. else
  938. begin
  939. ss:=s;
  940. val(ss,fpc_val_enum_widestr,code);
  941. end;
  942. end;
  943. Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
  944. Var
  945. SS : String;
  946. begin
  947. if length(S) > 255 then
  948. begin
  949. fpc_Val_Currency_WideStr:=0;
  950. code := 256;
  951. end
  952. else
  953. begin
  954. SS := S;
  955. Val(SS,fpc_Val_Currency_WideStr,code);
  956. end;
  957. end;
  958. Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
  959. Var
  960. SS : ShortString;
  961. begin
  962. fpc_Val_UInt_WideStr := 0;
  963. if length(S) > 255 then
  964. code := 256
  965. else
  966. begin
  967. SS := S;
  968. Val(SS,fpc_Val_UInt_WideStr,code);
  969. end;
  970. end;
  971. Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
  972. Var
  973. SS : ShortString;
  974. begin
  975. fpc_Val_SInt_WideStr:=0;
  976. if length(S)>255 then
  977. code:=256
  978. else
  979. begin
  980. SS := S;
  981. fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  982. end;
  983. end;
  984. {$ifndef CPU64}
  985. Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
  986. Var
  987. SS : ShortString;
  988. begin
  989. fpc_Val_qword_WideStr:=0;
  990. if length(S)>255 then
  991. code:=256
  992. else
  993. begin
  994. SS := S;
  995. Val(SS,fpc_Val_qword_WideStr,Code);
  996. end;
  997. end;
  998. Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
  999. Var
  1000. SS : ShortString;
  1001. begin
  1002. fpc_Val_int64_WideStr:=0;
  1003. if length(S)>255 then
  1004. code:=256
  1005. else
  1006. begin
  1007. SS := S;
  1008. Val(SS,fpc_Val_int64_WideStr,Code);
  1009. end;
  1010. end;
  1011. {$endif CPU64}
  1012. {$ifndef FPUNONE}
  1013. procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
  1014. var
  1015. ss : shortstring;
  1016. begin
  1017. str_real(len,fr,d,treal_type(rt),ss);
  1018. s:=ss;
  1019. end;
  1020. {$endif}
  1021. procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
  1022. var ss:shortstring;
  1023. begin
  1024. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1025. s:=ss;
  1026. end;
  1027. {$ifdef FPC_HAS_STR_CURRENCY}
  1028. procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
  1029. var
  1030. ss : shortstring;
  1031. begin
  1032. str(c:len:fr,ss);
  1033. s:=ss;
  1034. end;
  1035. {$endif FPC_HAS_STR_CURRENCY}
  1036. Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
  1037. Var
  1038. SS : ShortString;
  1039. begin
  1040. Str (v:Len,SS);
  1041. S:=SS;
  1042. end;
  1043. Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
  1044. Var
  1045. SS : ShortString;
  1046. begin
  1047. str(v:Len,SS);
  1048. S:=SS;
  1049. end;
  1050. {$ifndef CPU64}
  1051. Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
  1052. Var
  1053. SS : ShortString;
  1054. begin
  1055. Str (v:Len,SS);
  1056. S:=SS;
  1057. end;
  1058. Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
  1059. Var
  1060. SS : ShortString;
  1061. begin
  1062. str(v:Len,SS);
  1063. S:=SS;
  1064. end;
  1065. {$endif CPU64}
  1066. { converts an utf-16 code point or surrogate pair to utf-32 }
  1067. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
  1068. var
  1069. w: widechar;
  1070. begin
  1071. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1072. { are the same in UTF-32 }
  1073. w:=s[index];
  1074. if (w<=#$d7ff) or
  1075. (w>=#$e000) then
  1076. begin
  1077. result:=UCS4Char(w);
  1078. len:=1;
  1079. end
  1080. { valid surrogate pair? }
  1081. else if (w<=#$dbff) and
  1082. { w>=#$d7ff check not needed, checked above }
  1083. (index<length(s)) and
  1084. (s[index+1]>=#$dc00) and
  1085. (s[index+1]<=#$dfff) then
  1086. { convert the surrogate pair to UTF-32 }
  1087. begin
  1088. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1089. len:=2;
  1090. end
  1091. else
  1092. { invalid surrogate -> do nothing }
  1093. begin
  1094. result:=UCS4Char(w);
  1095. len:=1;
  1096. end;
  1097. end;
  1098. function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1099. begin
  1100. if assigned(Source) then
  1101. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1102. else
  1103. Result:=0;
  1104. end;
  1105. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
  1106. var
  1107. i,j : SizeUInt;
  1108. w : word;
  1109. lw : longword;
  1110. len : longint;
  1111. begin
  1112. result:=0;
  1113. if source=nil then
  1114. exit;
  1115. i:=0;
  1116. j:=0;
  1117. if assigned(Dest) then
  1118. begin
  1119. while (i<SourceChars) and (j<MaxDestBytes) do
  1120. begin
  1121. w:=word(Source[i]);
  1122. case w of
  1123. 0..$7f:
  1124. begin
  1125. Dest[j]:=char(w);
  1126. inc(j);
  1127. end;
  1128. $80..$7ff:
  1129. begin
  1130. if j+1>=MaxDestBytes then
  1131. break;
  1132. Dest[j]:=char($c0 or (w shr 6));
  1133. Dest[j+1]:=char($80 or (w and $3f));
  1134. inc(j,2);
  1135. end;
  1136. $800..$d7ff,$e000..$ffff:
  1137. begin
  1138. if j+2>=MaxDestBytes then
  1139. break;
  1140. Dest[j]:=char($e0 or (w shr 12));
  1141. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1142. Dest[j+2]:=char($80 or (w and $3f));
  1143. inc(j,3);
  1144. end;
  1145. $d800..$dbff:
  1146. {High Surrogates}
  1147. begin
  1148. if j+3>=MaxDestBytes then
  1149. break;
  1150. if (i<sourcechars-1) and
  1151. (word(Source[i+1]) >= $dc00) and
  1152. (word(Source[i+1]) <= $dfff) then
  1153. begin
  1154. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1155. Dest[j]:=char($f0 or (lw shr 18));
  1156. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1157. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1158. Dest[j+3]:=char($80 or (lw and $3f));
  1159. inc(j,4);
  1160. inc(i);
  1161. end;
  1162. end;
  1163. end;
  1164. inc(i);
  1165. end;
  1166. if j>SizeUInt(MaxDestBytes-1) then
  1167. j:=MaxDestBytes-1;
  1168. Dest[j]:=#0;
  1169. end
  1170. else
  1171. begin
  1172. while i<SourceChars do
  1173. begin
  1174. case word(Source[i]) of
  1175. $0..$7f:
  1176. inc(j);
  1177. $80..$7ff:
  1178. inc(j,2);
  1179. $800..$d7ff,$e000..$ffff:
  1180. inc(j,3);
  1181. $d800..$dbff:
  1182. begin
  1183. if (i<sourcechars-1) and
  1184. (word(Source[i+1]) >= $dc00) and
  1185. (word(Source[i+1]) <= $dfff) then
  1186. begin
  1187. inc(j,4);
  1188. inc(i);
  1189. end;
  1190. end;
  1191. end;
  1192. inc(i);
  1193. end;
  1194. end;
  1195. result:=j+1;
  1196. end;
  1197. function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1198. begin
  1199. if assigned(Source) then
  1200. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1201. else
  1202. Result:=0;
  1203. end;
  1204. function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1205. const
  1206. UNICODE_INVALID=63;
  1207. var
  1208. InputUTF8: SizeUInt;
  1209. IBYTE: BYTE;
  1210. OutputUnicode: SizeUInt;
  1211. PRECHAR: SizeUInt;
  1212. TempBYTE: BYTE;
  1213. CharLen: SizeUint;
  1214. LookAhead: SizeUInt;
  1215. UC: SizeUInt;
  1216. begin
  1217. if not assigned(Source) then
  1218. begin
  1219. result:=0;
  1220. exit;
  1221. end;
  1222. result:=SizeUInt(-1);
  1223. InputUTF8:=0;
  1224. OutputUnicode:=0;
  1225. PreChar:=0;
  1226. if Assigned(Dest) Then
  1227. begin
  1228. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1229. begin
  1230. IBYTE:=byte(Source[InputUTF8]);
  1231. if (IBYTE and $80) = 0 then
  1232. begin
  1233. //One character US-ASCII, convert it to unicode
  1234. if IBYTE = 10 then
  1235. begin
  1236. If (PreChar<>13) and FALSE then
  1237. begin
  1238. //Expand to crlf, conform UTF-8.
  1239. //This procedure will break the memory alocation by
  1240. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1241. if OutputUnicode+1<MaxDestChars then
  1242. begin
  1243. Dest[OutputUnicode]:=WideChar(13);
  1244. inc(OutputUnicode);
  1245. Dest[OutputUnicode]:=WideChar(10);
  1246. inc(OutputUnicode);
  1247. PreChar:=10;
  1248. end
  1249. else
  1250. begin
  1251. Dest[OutputUnicode]:=WideChar(13);
  1252. inc(OutputUnicode);
  1253. end;
  1254. end
  1255. else
  1256. begin
  1257. Dest[OutputUnicode]:=WideChar(IBYTE);
  1258. inc(OutputUnicode);
  1259. PreChar:=IBYTE;
  1260. end;
  1261. end
  1262. else
  1263. begin
  1264. Dest[OutputUnicode]:=WideChar(IBYTE);
  1265. inc(OutputUnicode);
  1266. PreChar:=IBYTE;
  1267. end;
  1268. inc(InputUTF8);
  1269. end
  1270. else
  1271. begin
  1272. TempByte:=IBYTE;
  1273. CharLen:=0;
  1274. while (TempBYTE and $80)<>0 do
  1275. begin
  1276. TempBYTE:=(TempBYTE shl 1) and $FE;
  1277. inc(CharLen);
  1278. end;
  1279. //Test for the "CharLen" conforms UTF-8 string
  1280. //This means the 10xxxxxx pattern.
  1281. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1282. begin
  1283. //Insuficient chars in string to decode
  1284. //UTF-8 array. Fallback to single char.
  1285. CharLen:= 1;
  1286. end;
  1287. for LookAhead := 1 to CharLen-1 do
  1288. begin
  1289. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1290. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1291. begin
  1292. //Invalid UTF-8 sequence, fallback.
  1293. CharLen:= LookAhead;
  1294. break;
  1295. end;
  1296. end;
  1297. UC:=$FFFF;
  1298. case CharLen of
  1299. 1: begin
  1300. //Not valid UTF-8 sequence
  1301. UC:=UNICODE_INVALID;
  1302. end;
  1303. 2: begin
  1304. //Two bytes UTF, convert it
  1305. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1306. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1307. if UC <= $7F then
  1308. begin
  1309. //Invalid UTF sequence.
  1310. UC:=UNICODE_INVALID;
  1311. end;
  1312. end;
  1313. 3: begin
  1314. //Three bytes, convert it to unicode
  1315. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1316. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1317. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1318. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1319. begin
  1320. //Invalid UTF-8 sequence
  1321. UC:= UNICODE_INVALID;
  1322. End;
  1323. end;
  1324. 4: begin
  1325. //Four bytes, convert it to two unicode characters
  1326. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1327. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1328. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1329. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1330. if (UC < $10000) or (UC > $10FFFF) then
  1331. begin
  1332. UC:= UNICODE_INVALID;
  1333. end
  1334. else
  1335. begin
  1336. { only store pair if room }
  1337. dec(UC,$10000);
  1338. if (OutputUnicode<MaxDestChars-1) then
  1339. begin
  1340. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1341. inc(OutputUnicode);
  1342. UC:=(UC and $3ff) + $DC00;
  1343. end
  1344. else
  1345. begin
  1346. InputUTF8:= InputUTF8 + CharLen;
  1347. { don't store anything }
  1348. CharLen:=0;
  1349. end;
  1350. end;
  1351. end;
  1352. 5,6,7: begin
  1353. //Invalid UTF8 to unicode conversion,
  1354. //mask it as invalid UNICODE too.
  1355. UC:=UNICODE_INVALID;
  1356. end;
  1357. end;
  1358. if CharLen > 0 then
  1359. begin
  1360. PreChar:=UC;
  1361. Dest[OutputUnicode]:=WideChar(UC);
  1362. inc(OutputUnicode);
  1363. end;
  1364. InputUTF8:= InputUTF8 + CharLen;
  1365. end;
  1366. end;
  1367. Result:=OutputUnicode+1;
  1368. end
  1369. else
  1370. begin
  1371. while (InputUTF8<SourceBytes) do
  1372. begin
  1373. IBYTE:=byte(Source[InputUTF8]);
  1374. if (IBYTE and $80) = 0 then
  1375. begin
  1376. //One character US-ASCII, convert it to unicode
  1377. if IBYTE = 10 then
  1378. begin
  1379. if (PreChar<>13) and FALSE then
  1380. begin
  1381. //Expand to crlf, conform UTF-8.
  1382. //This procedure will break the memory alocation by
  1383. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1384. inc(OutputUnicode,2);
  1385. PreChar:=10;
  1386. end
  1387. else
  1388. begin
  1389. inc(OutputUnicode);
  1390. PreChar:=IBYTE;
  1391. end;
  1392. end
  1393. else
  1394. begin
  1395. inc(OutputUnicode);
  1396. PreChar:=IBYTE;
  1397. end;
  1398. inc(InputUTF8);
  1399. end
  1400. else
  1401. begin
  1402. TempByte:=IBYTE;
  1403. CharLen:=0;
  1404. while (TempBYTE and $80)<>0 do
  1405. begin
  1406. TempBYTE:=(TempBYTE shl 1) and $FE;
  1407. inc(CharLen);
  1408. end;
  1409. //Test for the "CharLen" conforms UTF-8 string
  1410. //This means the 10xxxxxx pattern.
  1411. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1412. begin
  1413. //Insuficient chars in string to decode
  1414. //UTF-8 array. Fallback to single char.
  1415. CharLen:= 1;
  1416. end;
  1417. for LookAhead := 1 to CharLen-1 do
  1418. begin
  1419. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1420. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1421. begin
  1422. //Invalid UTF-8 sequence, fallback.
  1423. CharLen:= LookAhead;
  1424. break;
  1425. end;
  1426. end;
  1427. UC:=$FFFF;
  1428. case CharLen of
  1429. 1: begin
  1430. //Not valid UTF-8 sequence
  1431. UC:=UNICODE_INVALID;
  1432. end;
  1433. 2: begin
  1434. //Two bytes UTF, convert it
  1435. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1436. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1437. if UC <= $7F then
  1438. begin
  1439. //Invalid UTF sequence.
  1440. UC:=UNICODE_INVALID;
  1441. end;
  1442. end;
  1443. 3: begin
  1444. //Three bytes, convert it to unicode
  1445. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1446. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1447. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1448. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1449. begin
  1450. //Invalid UTF-8 sequence
  1451. UC:= UNICODE_INVALID;
  1452. end;
  1453. end;
  1454. 4: begin
  1455. //Four bytes, convert it to two unicode characters
  1456. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1457. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1458. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1459. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1460. if (UC < $10000) or (UC > $10FFFF) then
  1461. UC:= UNICODE_INVALID
  1462. else
  1463. { extra character character }
  1464. inc(OutputUnicode);
  1465. end;
  1466. 5,6,7: begin
  1467. //Invalid UTF8 to unicode conversion,
  1468. //mask it as invalid UNICODE too.
  1469. UC:=UNICODE_INVALID;
  1470. end;
  1471. end;
  1472. if CharLen > 0 then
  1473. begin
  1474. PreChar:=UC;
  1475. inc(OutputUnicode);
  1476. end;
  1477. InputUTF8:= InputUTF8 + CharLen;
  1478. end;
  1479. end;
  1480. Result:=OutputUnicode+1;
  1481. end;
  1482. end;
  1483. function UTF8Encode(const s : WideString) : UTF8String;
  1484. var
  1485. i : SizeInt;
  1486. hs : UTF8String;
  1487. begin
  1488. result:='';
  1489. if s='' then
  1490. exit;
  1491. SetLength(hs,length(s)*3);
  1492. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
  1493. if i>0 then
  1494. begin
  1495. SetLength(hs,i-1);
  1496. result:=hs;
  1497. end;
  1498. end;
  1499. const
  1500. SNoWidestrings = 'This binary has no widestrings support compiled in.';
  1501. SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
  1502. procedure unimplementedwidestring;
  1503. begin
  1504. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1505. If IsConsole then
  1506. begin
  1507. Writeln(StdErr,SNoWidestrings);
  1508. Writeln(StdErr,SRecompileWithWidestrings);
  1509. end;
  1510. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1511. HandleErrorFrame(233,get_frame);
  1512. end;
  1513. {$warnings off}
  1514. function GenericWideCase(const s : WideString) : WideString;
  1515. begin
  1516. unimplementedwidestring;
  1517. end;
  1518. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  1519. begin
  1520. unimplementedwidestring;
  1521. end;
  1522. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  1523. begin
  1524. unimplementedwidestring;
  1525. end;
  1526. {$warnings on}
  1527. function CharLengthPChar(const Str: PChar): PtrInt;forward;
  1528. procedure initwidestringmanager;
  1529. begin
  1530. fillchar(widestringmanager,sizeof(widestringmanager),0);
  1531. {$ifndef HAS_WIDESTRINGMANAGER}
  1532. widestringmanager.Wide2AnsiMoveProc:=@defaultWide2AnsiMove;
  1533. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2WideMove;
  1534. widestringmanager.UpperWideStringProc:=@GenericWideCase;
  1535. widestringmanager.LowerWideStringProc:=@GenericWideCase;
  1536. {$endif HAS_WIDESTRINGMANAGER}
  1537. widestringmanager.CompareWideStringProc:=@CompareWideString;
  1538. widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
  1539. widestringmanager.CharLengthPCharProc:=@CharLengthPChar;
  1540. end;