wstrings.inc 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for WideStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. This file contains the implementation of the WideString type,
  14. and all things that are needed for it.
  15. WideString is defined as a 'silent' pwidechar :
  16. a pwidechar that points to :
  17. @-8 : SizeInt for reference count;
  18. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  19. with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
  20. Windows COM BSTR.
  21. @ : String + Terminating #0;
  22. Pwidechar(Widestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PWideRec = ^TWideRec;
  29. TWideRec = Packed Record
  30. Len : DWord;
  31. First : WideChar;
  32. end;
  33. Const
  34. WideRecLen = SizeOf(TWideRec);
  35. WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
  36. {
  37. Default WideChar <-> Char conversion is to only convert the
  38. lower 127 chars, all others are translated to spaces.
  39. These routines can be overwritten for the Current Locale
  40. }
  41. procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
  42. var
  43. i : SizeInt;
  44. destp: PChar;
  45. begin
  46. setlength(dest,len);
  47. destp := PChar(Pointer(dest));
  48. for i:=1 to len do
  49. begin
  50. if word(source^)<256 then
  51. destp^:=char(word(source^))
  52. else
  53. destp^:='?';
  54. inc(source);
  55. inc(destp);
  56. end;
  57. end;
  58. procedure DefaultAnsi2WideMove(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. begin
  62. setlength(dest,len);
  63. for i:=1 to len do
  64. begin
  65. dest[i]:=widechar(byte(source^));
  66. inc(source);
  67. end;
  68. end;
  69. (*
  70. Procedure UniqueWideString(Var S : WideString); [Public,Alias : 'FPC_WIDESTR_UNIQUE'];
  71. {
  72. Make sure reference count of S is 1,
  73. using copy-on-write semantics.
  74. }
  75. begin
  76. end;
  77. *)
  78. {****************************************************************************
  79. Internal functions, not in interface.
  80. ****************************************************************************}
  81. procedure WideStringError;
  82. begin
  83. HandleErrorFrame(204,get_frame);
  84. end;
  85. {$ifdef WideStrDebug}
  86. Procedure DumpWideRec(S : Pointer);
  87. begin
  88. If S=Nil then
  89. Writeln ('String is nil')
  90. Else
  91. Begin
  92. With PWideRec(S-WideFirstOff)^ do
  93. begin
  94. Write ('(Len:',len);
  95. Writeln (' Ref: ',ref,')');
  96. end;
  97. end;
  98. end;
  99. {$endif}
  100. Function NewWideString(Len : SizeInt) : Pointer;
  101. {
  102. Allocate a new WideString on the heap.
  103. initialize it to zero length and reference count 1.
  104. }
  105. Var
  106. P : Pointer;
  107. begin
  108. {$ifdef MSWINDOWS}
  109. if winwidestringalloc then
  110. begin
  111. P:=SysAllocStringLen(nil,Len);
  112. if P=nil then
  113. WideStringError;
  114. end
  115. else
  116. {$endif MSWINDOWS}
  117. begin
  118. GetMem(P,Len*sizeof(WideChar)+WideRecLen);
  119. If P<>Nil then
  120. begin
  121. PWideRec(P)^.Len:=Len*2; { Initial length }
  122. PWideRec(P)^.First:=#0; { Terminating #0 }
  123. inc(p,WideFirstOff); { Points to string now }
  124. end
  125. else
  126. WideStringError;
  127. end;
  128. NewWideString:=P;
  129. end;
  130. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
  131. {
  132. Decreases the ReferenceCount of a non constant widestring;
  133. If the reference count is zero, deallocate the string;
  134. }
  135. Begin
  136. If S=Nil then
  137. exit;
  138. {$ifdef MSWINDOWS}
  139. if winwidestringalloc then
  140. SysFreeString(S)
  141. else
  142. {$endif MSWINDOWS}
  143. begin
  144. Dec (S,WideFirstOff);
  145. Freemem(S);
  146. end;
  147. S:=Nil;
  148. end;
  149. { alias for internal use }
  150. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
  151. Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
  152. var
  153. p : pointer;
  154. Begin
  155. If S=Nil then
  156. exit;
  157. p:=NewWidestring(length(WideString(S)));
  158. move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
  159. s:=p;
  160. end;
  161. { alias for internal use }
  162. Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
  163. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  164. function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
  165. {
  166. Converts a WideString to a ShortString;
  167. }
  168. Var
  169. Size : SizeInt;
  170. temp : ansistring;
  171. begin
  172. result:='';
  173. Size:=Length(S2);
  174. if Size>0 then
  175. begin
  176. If Size>high_of_res then
  177. Size:=high_of_res;
  178. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
  179. result:=temp;
  180. end;
  181. end;
  182. {$else FPC_STRTOSHORTSTRINGPROC}
  183. procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
  184. {
  185. Converts a WideString to a ShortString;
  186. }
  187. Var
  188. Size : SizeInt;
  189. temp : ansistring;
  190. begin
  191. res:='';
  192. Size:=Length(S2);
  193. if Size>0 then
  194. begin
  195. If Size>high(res) then
  196. Size:=high(res);
  197. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,DefaultSystemCodePage,Size);
  198. res:=temp;
  199. end;
  200. end;
  201. {$endif FPC_STRTOSHORTSTRINGPROC}
  202. Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
  203. {
  204. Converts a ShortString to a WideString;
  205. }
  206. Var
  207. Size : SizeInt;
  208. begin
  209. result:='';
  210. Size:=Length(S2);
  211. if Size>0 then
  212. begin
  213. widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  214. { Terminating Zero }
  215. PWideChar(Pointer(fpc_ShortStr_To_WideStr)+Size*sizeof(WideChar))^:=#0;
  216. end;
  217. end;
  218. Function fpc_WideStr_To_AnsiStr (const S2 : WideString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  219. {
  220. Converts a WideString to an AnsiString
  221. }
  222. Var
  223. Size : SizeInt;
  224. {$ifndef FPC_HAS_CPSTRING}
  225. cp : TSystemCodePage;
  226. {$endif FPC_HAS_CPSTRING}
  227. begin
  228. {$ifndef FPC_HAS_CPSTRING}
  229. cp:=DefaultSystemCodePage;
  230. {$endif FPC_HAS_CPSTRING}
  231. result:='';
  232. Size:=Length(S2);
  233. if Size>0 then
  234. begin
  235. if (cp=CP_NONE) or (cp=0) then
  236. cp:=DefaultSystemCodePage;
  237. widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,cp,Size);
  238. end;
  239. end;
  240. Function fpc_AnsiStr_To_WideStr (Const S2 : RawByteString): WideString; compilerproc;
  241. {
  242. Converts an AnsiString to a WideString;
  243. }
  244. Var
  245. Size : SizeInt;
  246. cp: TSystemCodePage;
  247. begin
  248. result:='';
  249. Size:=Length(S2);
  250. if Size>0 then
  251. begin
  252. cp:=StringCodePage(S2);
  253. if (cp=CP_NONE) or (cp=0) then
  254. cp:=DefaultSystemCodePage;
  255. widestringmanager.Ansi2WideMoveProc(PChar(S2),cp,result,Size);
  256. end;
  257. end;
  258. Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
  259. var
  260. Size : SizeInt;
  261. begin
  262. result:='';
  263. if p=nil then
  264. exit;
  265. Size := IndexWord(p^, -1, 0);
  266. Setlength(result,Size);
  267. if Size>0 then
  268. begin
  269. Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
  270. { Terminating Zero }
  271. PWideChar(Pointer(result)+Size*sizeof(WideChar))^:=#0;
  272. end;
  273. end;
  274. { checked against the ansistring routine, 2001-05-27 (FK) }
  275. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
  276. {
  277. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  278. }
  279. begin
  280. if S1=S2 then exit;
  281. if S2<>nil then
  282. begin
  283. {$ifdef MSWINDOWS}
  284. if winwidestringalloc then
  285. begin
  286. if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
  287. WideStringError;
  288. end
  289. else
  290. {$endif MSWINDOWS}
  291. begin
  292. SetLength(WideString(S1),length(WideString(S2)));
  293. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  294. end;
  295. end
  296. else
  297. begin
  298. { Free S1 }
  299. fpc_widestr_decr_ref (S1);
  300. S1:=nil;
  301. end;
  302. end;
  303. { alias for internal use }
  304. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
  305. {$ifndef STR_CONCAT_PROCS}
  306. function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
  307. Var
  308. Size,Location : SizeInt;
  309. pc : pwidechar;
  310. begin
  311. { only assign if s1 or s2 is empty }
  312. if (S1='') then
  313. begin
  314. result:=s2;
  315. exit;
  316. end;
  317. if (S2='') then
  318. begin
  319. result:=s1;
  320. exit;
  321. end;
  322. Location:=Length(S1);
  323. Size:=length(S2);
  324. SetLength(result,Size+Location);
  325. pc:=pwidechar(result);
  326. Move(S1[1],pc^,Location*sizeof(WideChar));
  327. inc(pc,location);
  328. Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
  329. end;
  330. function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
  331. Var
  332. i : Longint;
  333. p : pointer;
  334. pc : pwidechar;
  335. Size,NewSize : SizeInt;
  336. begin
  337. { First calculate size of the result so we can do
  338. a single call to SetLength() }
  339. NewSize:=0;
  340. for i:=low(sarr) to high(sarr) do
  341. inc(Newsize,length(sarr[i]));
  342. SetLength(result,NewSize);
  343. pc:=pwidechar(result);
  344. for i:=low(sarr) to high(sarr) do
  345. begin
  346. p:=pointer(sarr[i]);
  347. if assigned(p) then
  348. begin
  349. Size:=length(widestring(p));
  350. Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
  351. inc(pc,size);
  352. end;
  353. end;
  354. end;
  355. {$else STR_CONCAT_PROCS}
  356. procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
  357. Var
  358. Size,Location : SizeInt;
  359. same : boolean;
  360. begin
  361. { only assign if s1 or s2 is empty }
  362. if (S1='') then
  363. begin
  364. DestS:=s2;
  365. exit;
  366. end;
  367. if (S2='') then
  368. begin
  369. DestS:=s1;
  370. exit;
  371. end;
  372. Location:=Length(S1);
  373. Size:=length(S2);
  374. { Use Pointer() typecasts to prevent extra conversion code }
  375. if Pointer(DestS)=Pointer(S1) then
  376. begin
  377. same:=Pointer(S1)=Pointer(S2);
  378. SetLength(DestS,Size+Location);
  379. if same then
  380. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
  381. else
  382. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  383. end
  384. else if Pointer(DestS)=Pointer(S2) then
  385. begin
  386. SetLength(DestS,Size+Location);
  387. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  388. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  389. end
  390. else
  391. begin
  392. DestS:='';
  393. SetLength(DestS,Size+Location);
  394. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  395. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  396. end;
  397. end;
  398. procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
  399. Var
  400. i : Longint;
  401. p,pc : pointer;
  402. Size,NewLen : SizeInt;
  403. DestTmp : Widestring;
  404. begin
  405. if high(sarr)=0 then
  406. begin
  407. DestS:='';
  408. exit;
  409. end;
  410. { First calculate size of the result so we can do
  411. a single call to SetLength() }
  412. NewLen:=0;
  413. for i:=low(sarr) to high(sarr) do
  414. inc(NewLen,length(sarr[i]));
  415. SetLength(DestTmp,NewLen);
  416. pc:=pwidechar(DestTmp);
  417. for i:=low(sarr) to high(sarr) do
  418. begin
  419. p:=pointer(sarr[i]);
  420. if assigned(p) then
  421. begin
  422. Size:=length(widestring(p));
  423. Move(p^,pc^,(Size+1)*sizeof(WideChar));
  424. inc(pc,size*sizeof(WideChar));
  425. end;
  426. end;
  427. DestS:=DestTmp;
  428. end;
  429. {$endif STR_CONCAT_PROCS}
  430. Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
  431. {
  432. Converts a Char to a WideString;
  433. }
  434. begin
  435. Setlength(fpc_Char_To_WideStr,1);
  436. fpc_Char_To_WideStr[1]:=c;
  437. { Terminating Zero }
  438. PWideChar(Pointer(fpc_Char_To_WideStr)+sizeof(WideChar))^:=#0;
  439. end;
  440. Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  441. {
  442. Converts a WideChar to a WideString;
  443. }
  444. begin
  445. Setlength (fpc_WChar_To_WideStr,1);
  446. fpc_WChar_To_WideStr[1]:= c;
  447. end;
  448. Function fpc_WChar_To_AnsiStr(const c : WideChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  449. {
  450. Converts a WideChar to a AnsiString;
  451. }
  452. begin
  453. widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr,{$ifdef FPC_HAS_CPSTRING}cp{$else}TSystemCodePage(0){$endif FPC_HAS_CPSTRING}, 1);
  454. end;
  455. Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  456. {
  457. Converts a WideChar to a WideString;
  458. }
  459. begin
  460. Setlength (fpc_UChar_To_WideStr,1);
  461. fpc_UChar_To_WideStr[1]:= c;
  462. end;
  463. Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
  464. Var
  465. L : SizeInt;
  466. begin
  467. if (not assigned(p)) or (p[0]=#0) Then
  468. begin
  469. fpc_pchar_to_widestr := '';
  470. exit;
  471. end;
  472. l:=IndexChar(p^,-1,#0);
  473. widestringmanager.Ansi2WideMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_WideStr,l);
  474. end;
  475. Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
  476. var
  477. i : SizeInt;
  478. begin
  479. if (zerobased) then
  480. begin
  481. if (arr[0]=#0) Then
  482. begin
  483. fpc_chararray_to_widestr := '';
  484. exit;
  485. end;
  486. i:=IndexChar(arr,high(arr)+1,#0);
  487. if i = -1 then
  488. i := high(arr)+1;
  489. end
  490. else
  491. i := high(arr)+1;
  492. SetLength(fpc_CharArray_To_WideStr,i);
  493. widestringmanager.Ansi2WideMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_WideStr,i);
  494. end;
  495. {$ifndef FPC_STRTOCHARARRAYPROC}
  496. { inside the compiler, the resulttype is modified to that of the actual }
  497. { chararray we're converting to (JM) }
  498. function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
  499. var
  500. len: SizeInt;
  501. temp: ansistring;
  502. begin
  503. len := length(src);
  504. { make sure we don't dereference src if it can be nil (JM) }
  505. if len > 0 then
  506. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
  507. len := length(temp);
  508. if len > arraysize then
  509. len := arraysize;
  510. {$push}
  511. {$r-}
  512. move(temp[1],fpc_widestr_to_chararray[0],len);
  513. fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
  514. {$pop}
  515. end;
  516. { inside the compiler, the resulttype is modified to that of the actual }
  517. { widechararray we're converting to (JM) }
  518. function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
  519. var
  520. len: SizeInt;
  521. begin
  522. len := length(src);
  523. if len > arraysize then
  524. len := arraysize;
  525. {$push}
  526. {$r-}
  527. { make sure we don't try to access element 1 of the ansistring if it's nil }
  528. if len > 0 then
  529. move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
  530. fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  531. {$pop}
  532. end;
  533. { inside the compiler, the resulttype is modified to that of the actual }
  534. { chararray we're converting to (JM) }
  535. function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
  536. var
  537. len: SizeInt;
  538. temp: widestring;
  539. begin
  540. len := length(src);
  541. { make sure we don't dereference src if it can be nil (JM) }
  542. if len > 0 then
  543. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  544. len := length(temp);
  545. if len > arraysize then
  546. len := arraysize;
  547. {$push}
  548. {$r-}
  549. move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
  550. fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  551. {$pop}
  552. end;
  553. function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
  554. var
  555. len: longint;
  556. temp : widestring;
  557. begin
  558. len := length(src);
  559. { make sure we don't access char 1 if length is 0 (JM) }
  560. if len > 0 then
  561. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  562. len := length(temp);
  563. if len > arraysize then
  564. len := arraysize;
  565. {$push}
  566. {$r-}
  567. move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
  568. fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  569. {$pop}
  570. end;
  571. {$else ndef FPC_STRTOCHARARRAYPROC}
  572. procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
  573. var
  574. len: SizeInt;
  575. temp: ansistring;
  576. begin
  577. len := length(src);
  578. { make sure we don't dereference src if it can be nil (JM) }
  579. if len > 0 then
  580. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,DefaultSystemCodePage,len);
  581. len := length(temp);
  582. if len > length(res) then
  583. len := length(res);
  584. {$push}
  585. {$r-}
  586. move(temp[1],res[0],len);
  587. fillchar(res[len],length(res)-len,0);
  588. {$pop}
  589. end;
  590. procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
  591. var
  592. len: SizeInt;
  593. begin
  594. len := length(src);
  595. if len > length(res) then
  596. len := length(res);
  597. {$push}
  598. {$r-}
  599. { make sure we don't try to access element 1 of the ansistring if it's nil }
  600. if len > 0 then
  601. move(src[1],res[0],len*SizeOf(WideChar));
  602. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  603. {$pop}
  604. end;
  605. {$endif ndef FPC_STRTOCHARARRAYPROC}
  606. Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
  607. {
  608. Compares 2 WideStrings;
  609. The result is
  610. <0 if S1<S2
  611. 0 if S1=S2
  612. >0 if S1>S2
  613. }
  614. Var
  615. MaxI,Temp : SizeInt;
  616. begin
  617. if pointer(S1)=pointer(S2) then
  618. begin
  619. fpc_WideStr_Compare:=0;
  620. exit;
  621. end;
  622. Maxi:=Length(S1);
  623. temp:=Length(S2);
  624. If MaxI>Temp then
  625. MaxI:=Temp;
  626. Temp:=CompareWord(S1[1],S2[1],MaxI);
  627. if temp=0 then
  628. temp:=Length(S1)-Length(S2);
  629. fpc_WideStr_Compare:=Temp;
  630. end;
  631. Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
  632. {
  633. Compares 2 WideStrings for equality only;
  634. The result is
  635. 0 if S1=S2
  636. <>0 if S1<>S2
  637. }
  638. Var
  639. MaxI : SizeInt;
  640. begin
  641. if pointer(S1)=pointer(S2) then
  642. exit(0);
  643. Maxi:=Length(S1);
  644. If MaxI<>Length(S2) then
  645. exit(-1)
  646. else
  647. exit(CompareWord(S1[1],S2[1],MaxI));
  648. end;
  649. {$ifdef VER2_4}
  650. // obsolete but needed for bootstrapping with 2.4
  651. Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
  652. begin
  653. if p=nil then
  654. HandleErrorFrame(201,get_frame);
  655. end;
  656. Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  657. begin
  658. if (index>len div 2) or (Index<1) then
  659. HandleErrorFrame(201,get_frame);
  660. end;
  661. {$else VER2_4}
  662. Procedure fpc_WideStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  663. begin
  664. if (p=nil) or (index>PWideRec(p-WideFirstOff)^.len div 2) or (Index<1) then
  665. HandleErrorFrame(201,get_frame);
  666. end;
  667. {$endif VER2_4}
  668. Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
  669. {
  670. Sets The length of string S to L.
  671. Makes sure S is unique, and contains enough room.
  672. }
  673. Var
  674. Temp : Pointer;
  675. movelen: SizeInt;
  676. begin
  677. if (l>0) then
  678. begin
  679. if Pointer(S)=nil then
  680. begin
  681. { Need a complete new string...}
  682. Pointer(s):=NewWideString(l);
  683. end
  684. { windows doesn't support reallocing widestrings, this code
  685. is anyways subject to be removed because widestrings shouldn't be
  686. ref. counted anymore (FK) }
  687. else
  688. if
  689. {$ifdef MSWINDOWS}
  690. not winwidestringalloc and
  691. {$endif MSWINDOWS}
  692. True
  693. then
  694. begin
  695. Dec(Pointer(S),WideFirstOff);
  696. if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
  697. reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
  698. Inc(Pointer(S), WideFirstOff);
  699. end
  700. else
  701. begin
  702. { Reallocation is needed... }
  703. Temp:=Pointer(NewWideString(L));
  704. if Length(S)>0 then
  705. begin
  706. if l < succ(length(s)) then
  707. movelen := l
  708. { also move terminating null }
  709. else
  710. movelen := succ(length(s));
  711. Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
  712. end;
  713. fpc_widestr_decr_ref(Pointer(S));
  714. Pointer(S):=Temp;
  715. end;
  716. { Force nil termination in case it gets shorter }
  717. PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
  718. {$ifdef MSWINDOWS}
  719. if not winwidestringalloc then
  720. {$endif MSWINDOWS}
  721. PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
  722. end
  723. else
  724. begin
  725. { Length=0 }
  726. if Pointer(S)<>nil then
  727. fpc_widestr_decr_ref (Pointer(S));
  728. Pointer(S):=Nil;
  729. end;
  730. end;
  731. {*****************************************************************************
  732. Public functions, In interface.
  733. *****************************************************************************}
  734. Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
  735. begin
  736. pointer(result) := pointer(s);
  737. end;
  738. Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
  739. var
  740. ResultAddress : Pointer;
  741. begin
  742. ResultAddress:=Nil;
  743. dec(index);
  744. if Index < 0 then
  745. Index := 0;
  746. { Check Size. Accounts for Zero-length S, the double check is needed because
  747. Size can be maxint and will get <0 when adding index }
  748. if (Size>Length(S)) or
  749. (Index+Size>Length(S)) then
  750. Size:=Length(S)-Index;
  751. If Size>0 then
  752. begin
  753. If Index<0 Then
  754. Index:=0;
  755. ResultAddress:=Pointer(NewWideString (Size));
  756. if ResultAddress<>Nil then
  757. begin
  758. Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
  759. PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
  760. PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
  761. end;
  762. end;
  763. fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
  764. Pointer(fpc_widestr_Copy):=ResultAddress;
  765. end;
  766. Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
  767. var
  768. i,MaxLen : SizeInt;
  769. pc : pwidechar;
  770. begin
  771. Pos:=0;
  772. if Length(SubStr)>0 then
  773. begin
  774. MaxLen:=Length(source)-Length(SubStr);
  775. i:=0;
  776. pc:=@source[1];
  777. while (i<=MaxLen) do
  778. begin
  779. inc(i);
  780. if (SubStr[1]=pc^) and
  781. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  782. begin
  783. Pos:=i;
  784. exit;
  785. end;
  786. inc(pc);
  787. end;
  788. end;
  789. end;
  790. { Faster version for a widechar alone }
  791. Function Pos (c : WideChar; Const s : WideString) : SizeInt;
  792. var
  793. i: SizeInt;
  794. pc : pwidechar;
  795. begin
  796. pc:=@s[1];
  797. for i:=1 to length(s) do
  798. begin
  799. if pc^=c then
  800. begin
  801. pos:=i;
  802. exit;
  803. end;
  804. inc(pc);
  805. end;
  806. pos:=0;
  807. end;
  808. Function Pos (c : WideChar; Const s : RawByteString) : SizeInt;
  809. begin
  810. result:=Pos(c,WideString(s));
  811. end;
  812. Function Pos (c : RawByteString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  813. begin
  814. result:=Pos(WideString(c),s);
  815. end;
  816. Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  817. begin
  818. result:=Pos(WideString(c),s);
  819. end;
  820. Function Pos (c : WideString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  821. begin
  822. result:=Pos(c,WideString(s));
  823. end;
  824. { Faster version for a char alone. Must be implemented because }
  825. { pos(c: char; const s: shortstring) also exists, so otherwise }
  826. { using pos(char,pchar) will always call the shortstring version }
  827. { (exact match for first argument), also with $h+ (JM) }
  828. Function Pos (c : Char; Const s : WideString) : SizeInt;
  829. var
  830. i: SizeInt;
  831. wc : widechar;
  832. pc : pwidechar;
  833. begin
  834. wc:=c;
  835. pc:=@s[1];
  836. for i:=1 to length(s) do
  837. begin
  838. if pc^=wc then
  839. begin
  840. pos:=i;
  841. exit;
  842. end;
  843. inc(pc);
  844. end;
  845. pos:=0;
  846. end;
  847. Procedure Delete (Var S : WideString; Index,Size: SizeInt);
  848. Var
  849. LS : SizeInt;
  850. begin
  851. LS:=Length(S);
  852. if (Index>LS) or (Index<=0) or (Size<=0) then
  853. exit;
  854. UniqueString (S);
  855. { (Size+Index) will overflow if Size=MaxInt. }
  856. if Size>LS-Index then
  857. Size:=LS-Index+1;
  858. if Size<=LS-Index then
  859. begin
  860. Dec(Index);
  861. Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
  862. end;
  863. Setlength(s,LS-Size);
  864. end;
  865. Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
  866. var
  867. Temp : WideString;
  868. LS : SizeInt;
  869. begin
  870. If Length(Source)=0 then
  871. exit;
  872. if index <= 0 then
  873. index := 1;
  874. Ls:=Length(S);
  875. if index > LS then
  876. index := LS+1;
  877. Dec(Index);
  878. Pointer(Temp) := NewWideString(Length(Source)+LS);
  879. SetLength(Temp,Length(Source)+LS);
  880. If Index>0 then
  881. move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
  882. Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
  883. If (LS-Index)>0 then
  884. Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
  885. S:=Temp;
  886. end;
  887. function UpCase(const s : WideString) : WideString;
  888. begin
  889. result:=widestringmanager.UpperWideStringProc(s);
  890. end;
  891. Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
  892. begin
  893. SetLength(S,Len);
  894. If (Buf<>Nil) and (Len>0) then
  895. Move (Buf[0],S[1],Len*sizeof(WideChar));
  896. end;
  897. Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
  898. begin
  899. SetLength(S,Len);
  900. If (Buf<>Nil) and (Len>0) then
  901. widestringmanager.Ansi2WideMoveProc(Buf,DefaultSystemCodePage,S,Len);
  902. end;
  903. {$ifndef FPUNONE}
  904. Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
  905. Var
  906. SS : String;
  907. begin
  908. fpc_Val_Real_WideStr := 0;
  909. if length(S) > 255 then
  910. code := 256
  911. else
  912. begin
  913. SS := S;
  914. Val(SS,fpc_Val_Real_WideStr,code);
  915. end;
  916. end;
  917. {$endif}
  918. function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
  919. var ss:shortstring;
  920. begin
  921. if length(s)>255 then
  922. code:=256
  923. else
  924. begin
  925. ss:=s;
  926. val(ss,fpc_val_enum_widestr,code);
  927. end;
  928. end;
  929. Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
  930. Var
  931. SS : String;
  932. begin
  933. if length(S) > 255 then
  934. begin
  935. fpc_Val_Currency_WideStr:=0;
  936. code := 256;
  937. end
  938. else
  939. begin
  940. SS := S;
  941. Val(SS,fpc_Val_Currency_WideStr,code);
  942. end;
  943. end;
  944. Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
  945. Var
  946. SS : ShortString;
  947. begin
  948. fpc_Val_UInt_WideStr := 0;
  949. if length(S) > 255 then
  950. code := 256
  951. else
  952. begin
  953. SS := S;
  954. Val(SS,fpc_Val_UInt_WideStr,code);
  955. end;
  956. end;
  957. Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
  958. Var
  959. SS : ShortString;
  960. begin
  961. fpc_Val_SInt_WideStr:=0;
  962. if length(S)>255 then
  963. code:=256
  964. else
  965. begin
  966. SS := S;
  967. fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  968. end;
  969. end;
  970. {$ifndef CPU64}
  971. Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
  972. Var
  973. SS : ShortString;
  974. begin
  975. fpc_Val_qword_WideStr:=0;
  976. if length(S)>255 then
  977. code:=256
  978. else
  979. begin
  980. SS := S;
  981. Val(SS,fpc_Val_qword_WideStr,Code);
  982. end;
  983. end;
  984. Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
  985. Var
  986. SS : ShortString;
  987. begin
  988. fpc_Val_int64_WideStr:=0;
  989. if length(S)>255 then
  990. code:=256
  991. else
  992. begin
  993. SS := S;
  994. Val(SS,fpc_Val_int64_WideStr,Code);
  995. end;
  996. end;
  997. {$endif CPU64}
  998. {$ifndef FPUNONE}
  999. procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
  1000. var
  1001. ss : shortstring;
  1002. begin
  1003. str_real(len,fr,d,treal_type(rt),ss);
  1004. s:=ss;
  1005. end;
  1006. {$endif}
  1007. procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
  1008. var ss:shortstring;
  1009. begin
  1010. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1011. s:=ss;
  1012. end;
  1013. procedure fpc_widestr_bool(b : boolean;len:sizeint;out s:widestring);compilerproc;
  1014. var ss:shortstring;
  1015. begin
  1016. fpc_shortstr_bool(b,len,ss);
  1017. s:=ss;
  1018. end;
  1019. {$ifdef FPC_HAS_STR_CURRENCY}
  1020. procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
  1021. var
  1022. ss : shortstring;
  1023. begin
  1024. str(c:len:fr,ss);
  1025. s:=ss;
  1026. end;
  1027. {$endif FPC_HAS_STR_CURRENCY}
  1028. Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
  1029. Var
  1030. SS : ShortString;
  1031. begin
  1032. Str (v:Len,SS);
  1033. S:=SS;
  1034. end;
  1035. Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
  1036. Var
  1037. SS : ShortString;
  1038. begin
  1039. str(v:Len,SS);
  1040. S:=SS;
  1041. end;
  1042. {$ifndef CPU64}
  1043. Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
  1044. Var
  1045. SS : ShortString;
  1046. begin
  1047. Str (v:Len,SS);
  1048. S:=SS;
  1049. end;
  1050. Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
  1051. Var
  1052. SS : ShortString;
  1053. begin
  1054. str(v:Len,SS);
  1055. S:=SS;
  1056. end;
  1057. {$endif CPU64}
  1058. { converts an utf-16 code point or surrogate pair to utf-32 }
  1059. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
  1060. var
  1061. w: widechar;
  1062. begin
  1063. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1064. { are the same in UTF-32 }
  1065. w:=s[index];
  1066. if (w<=#$d7ff) or
  1067. (w>=#$e000) then
  1068. begin
  1069. result:=UCS4Char(w);
  1070. len:=1;
  1071. end
  1072. { valid surrogate pair? }
  1073. else if (w<=#$dbff) and
  1074. { w>=#$d7ff check not needed, checked above }
  1075. (index<length(s)) and
  1076. (s[index+1]>=#$dc00) and
  1077. (s[index+1]<=#$dfff) then
  1078. { convert the surrogate pair to UTF-32 }
  1079. begin
  1080. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1081. len:=2;
  1082. end
  1083. else
  1084. { invalid surrogate -> do nothing }
  1085. begin
  1086. result:=UCS4Char(w);
  1087. len:=1;
  1088. end;
  1089. end;
  1090. function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1091. begin
  1092. if assigned(Source) then
  1093. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1094. else
  1095. Result:=0;
  1096. end;
  1097. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
  1098. var
  1099. i,j : SizeUInt;
  1100. w : word;
  1101. lw : longword;
  1102. len : longint;
  1103. begin
  1104. result:=0;
  1105. if source=nil then
  1106. exit;
  1107. i:=0;
  1108. j:=0;
  1109. if assigned(Dest) then
  1110. begin
  1111. while (i<SourceChars) and (j<MaxDestBytes) do
  1112. begin
  1113. w:=word(Source[i]);
  1114. case w of
  1115. 0..$7f:
  1116. begin
  1117. Dest[j]:=char(w);
  1118. inc(j);
  1119. end;
  1120. $80..$7ff:
  1121. begin
  1122. if j+1>=MaxDestBytes then
  1123. break;
  1124. Dest[j]:=char($c0 or (w shr 6));
  1125. Dest[j+1]:=char($80 or (w and $3f));
  1126. inc(j,2);
  1127. end;
  1128. $800..$d7ff,$e000..$ffff:
  1129. begin
  1130. if j+2>=MaxDestBytes then
  1131. break;
  1132. Dest[j]:=char($e0 or (w shr 12));
  1133. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1134. Dest[j+2]:=char($80 or (w and $3f));
  1135. inc(j,3);
  1136. end;
  1137. $d800..$dbff:
  1138. {High Surrogates}
  1139. begin
  1140. if j+3>=MaxDestBytes then
  1141. break;
  1142. if (i<sourcechars-1) and
  1143. (word(Source[i+1]) >= $dc00) and
  1144. (word(Source[i+1]) <= $dfff) then
  1145. begin
  1146. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1147. Dest[j]:=char($f0 or (lw shr 18));
  1148. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1149. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1150. Dest[j+3]:=char($80 or (lw and $3f));
  1151. inc(j,4);
  1152. inc(i);
  1153. end;
  1154. end;
  1155. end;
  1156. inc(i);
  1157. end;
  1158. if j>SizeUInt(MaxDestBytes-1) then
  1159. j:=MaxDestBytes-1;
  1160. Dest[j]:=#0;
  1161. end
  1162. else
  1163. begin
  1164. while i<SourceChars do
  1165. begin
  1166. case word(Source[i]) of
  1167. $0..$7f:
  1168. inc(j);
  1169. $80..$7ff:
  1170. inc(j,2);
  1171. $800..$d7ff,$e000..$ffff:
  1172. inc(j,3);
  1173. $d800..$dbff:
  1174. begin
  1175. if (i<sourcechars-1) and
  1176. (word(Source[i+1]) >= $dc00) and
  1177. (word(Source[i+1]) <= $dfff) then
  1178. begin
  1179. inc(j,4);
  1180. inc(i);
  1181. end;
  1182. end;
  1183. end;
  1184. inc(i);
  1185. end;
  1186. end;
  1187. result:=j+1;
  1188. end;
  1189. function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1190. begin
  1191. if assigned(Source) then
  1192. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1193. else
  1194. Result:=0;
  1195. end;
  1196. function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1197. const
  1198. UNICODE_INVALID=63;
  1199. var
  1200. InputUTF8: SizeUInt;
  1201. IBYTE: BYTE;
  1202. OutputUnicode: SizeUInt;
  1203. PRECHAR: SizeUInt;
  1204. TempBYTE: BYTE;
  1205. CharLen: SizeUint;
  1206. LookAhead: SizeUInt;
  1207. UC: SizeUInt;
  1208. begin
  1209. if not assigned(Source) then
  1210. begin
  1211. result:=0;
  1212. exit;
  1213. end;
  1214. result:=SizeUInt(-1);
  1215. InputUTF8:=0;
  1216. OutputUnicode:=0;
  1217. PreChar:=0;
  1218. if Assigned(Dest) Then
  1219. begin
  1220. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1221. begin
  1222. IBYTE:=byte(Source[InputUTF8]);
  1223. if (IBYTE and $80) = 0 then
  1224. begin
  1225. //One character US-ASCII, convert it to unicode
  1226. if IBYTE = 10 then
  1227. begin
  1228. If (PreChar<>13) and FALSE then
  1229. begin
  1230. //Expand to crlf, conform UTF-8.
  1231. //This procedure will break the memory alocation by
  1232. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1233. if OutputUnicode+1<MaxDestChars then
  1234. begin
  1235. Dest[OutputUnicode]:=WideChar(13);
  1236. inc(OutputUnicode);
  1237. Dest[OutputUnicode]:=WideChar(10);
  1238. inc(OutputUnicode);
  1239. PreChar:=10;
  1240. end
  1241. else
  1242. begin
  1243. Dest[OutputUnicode]:=WideChar(13);
  1244. inc(OutputUnicode);
  1245. end;
  1246. end
  1247. else
  1248. begin
  1249. Dest[OutputUnicode]:=WideChar(IBYTE);
  1250. inc(OutputUnicode);
  1251. PreChar:=IBYTE;
  1252. end;
  1253. end
  1254. else
  1255. begin
  1256. Dest[OutputUnicode]:=WideChar(IBYTE);
  1257. inc(OutputUnicode);
  1258. PreChar:=IBYTE;
  1259. end;
  1260. inc(InputUTF8);
  1261. end
  1262. else
  1263. begin
  1264. TempByte:=IBYTE;
  1265. CharLen:=0;
  1266. while (TempBYTE and $80)<>0 do
  1267. begin
  1268. TempBYTE:=(TempBYTE shl 1) and $FE;
  1269. inc(CharLen);
  1270. end;
  1271. //Test for the "CharLen" conforms UTF-8 string
  1272. //This means the 10xxxxxx pattern.
  1273. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1274. begin
  1275. //Insuficient chars in string to decode
  1276. //UTF-8 array. Fallback to single char.
  1277. CharLen:= 1;
  1278. end;
  1279. for LookAhead := 1 to CharLen-1 do
  1280. begin
  1281. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1282. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1283. begin
  1284. //Invalid UTF-8 sequence, fallback.
  1285. CharLen:= LookAhead;
  1286. break;
  1287. end;
  1288. end;
  1289. UC:=$FFFF;
  1290. case CharLen of
  1291. 1: begin
  1292. //Not valid UTF-8 sequence
  1293. UC:=UNICODE_INVALID;
  1294. end;
  1295. 2: begin
  1296. //Two bytes UTF, convert it
  1297. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1298. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1299. if UC <= $7F then
  1300. begin
  1301. //Invalid UTF sequence.
  1302. UC:=UNICODE_INVALID;
  1303. end;
  1304. end;
  1305. 3: begin
  1306. //Three bytes, convert it to unicode
  1307. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1308. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1309. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1310. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1311. begin
  1312. //Invalid UTF-8 sequence
  1313. UC:= UNICODE_INVALID;
  1314. End;
  1315. end;
  1316. 4: begin
  1317. //Four bytes, convert it to two unicode characters
  1318. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1319. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1320. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1321. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1322. if (UC < $10000) or (UC > $10FFFF) then
  1323. begin
  1324. UC:= UNICODE_INVALID;
  1325. end
  1326. else
  1327. begin
  1328. { only store pair if room }
  1329. dec(UC,$10000);
  1330. if (OutputUnicode<MaxDestChars-1) then
  1331. begin
  1332. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1333. inc(OutputUnicode);
  1334. UC:=(UC and $3ff) + $DC00;
  1335. end
  1336. else
  1337. begin
  1338. InputUTF8:= InputUTF8 + CharLen;
  1339. { don't store anything }
  1340. CharLen:=0;
  1341. end;
  1342. end;
  1343. end;
  1344. 5,6,7: begin
  1345. //Invalid UTF8 to unicode conversion,
  1346. //mask it as invalid UNICODE too.
  1347. UC:=UNICODE_INVALID;
  1348. end;
  1349. end;
  1350. if CharLen > 0 then
  1351. begin
  1352. PreChar:=UC;
  1353. Dest[OutputUnicode]:=WideChar(UC);
  1354. inc(OutputUnicode);
  1355. end;
  1356. InputUTF8:= InputUTF8 + CharLen;
  1357. end;
  1358. end;
  1359. Result:=OutputUnicode+1;
  1360. end
  1361. else
  1362. begin
  1363. while (InputUTF8<SourceBytes) do
  1364. begin
  1365. IBYTE:=byte(Source[InputUTF8]);
  1366. if (IBYTE and $80) = 0 then
  1367. begin
  1368. //One character US-ASCII, convert it to unicode
  1369. if IBYTE = 10 then
  1370. begin
  1371. if (PreChar<>13) and FALSE then
  1372. begin
  1373. //Expand to crlf, conform UTF-8.
  1374. //This procedure will break the memory alocation by
  1375. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1376. inc(OutputUnicode,2);
  1377. PreChar:=10;
  1378. end
  1379. else
  1380. begin
  1381. inc(OutputUnicode);
  1382. PreChar:=IBYTE;
  1383. end;
  1384. end
  1385. else
  1386. begin
  1387. inc(OutputUnicode);
  1388. PreChar:=IBYTE;
  1389. end;
  1390. inc(InputUTF8);
  1391. end
  1392. else
  1393. begin
  1394. TempByte:=IBYTE;
  1395. CharLen:=0;
  1396. while (TempBYTE and $80)<>0 do
  1397. begin
  1398. TempBYTE:=(TempBYTE shl 1) and $FE;
  1399. inc(CharLen);
  1400. end;
  1401. //Test for the "CharLen" conforms UTF-8 string
  1402. //This means the 10xxxxxx pattern.
  1403. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1404. begin
  1405. //Insuficient chars in string to decode
  1406. //UTF-8 array. Fallback to single char.
  1407. CharLen:= 1;
  1408. end;
  1409. for LookAhead := 1 to CharLen-1 do
  1410. begin
  1411. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1412. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1413. begin
  1414. //Invalid UTF-8 sequence, fallback.
  1415. CharLen:= LookAhead;
  1416. break;
  1417. end;
  1418. end;
  1419. UC:=$FFFF;
  1420. case CharLen of
  1421. 1: begin
  1422. //Not valid UTF-8 sequence
  1423. UC:=UNICODE_INVALID;
  1424. end;
  1425. 2: begin
  1426. //Two bytes UTF, convert it
  1427. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1428. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1429. if UC <= $7F then
  1430. begin
  1431. //Invalid UTF sequence.
  1432. UC:=UNICODE_INVALID;
  1433. end;
  1434. end;
  1435. 3: begin
  1436. //Three bytes, convert it to unicode
  1437. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1438. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1439. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1440. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1441. begin
  1442. //Invalid UTF-8 sequence
  1443. UC:= UNICODE_INVALID;
  1444. end;
  1445. end;
  1446. 4: begin
  1447. //Four bytes, convert it to two unicode characters
  1448. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1449. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1450. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1451. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1452. if (UC < $10000) or (UC > $10FFFF) then
  1453. UC:= UNICODE_INVALID
  1454. else
  1455. { extra character character }
  1456. inc(OutputUnicode);
  1457. end;
  1458. 5,6,7: begin
  1459. //Invalid UTF8 to unicode conversion,
  1460. //mask it as invalid UNICODE too.
  1461. UC:=UNICODE_INVALID;
  1462. end;
  1463. end;
  1464. if CharLen > 0 then
  1465. begin
  1466. PreChar:=UC;
  1467. inc(OutputUnicode);
  1468. end;
  1469. InputUTF8:= InputUTF8 + CharLen;
  1470. end;
  1471. end;
  1472. Result:=OutputUnicode+1;
  1473. end;
  1474. end;
  1475. function UTF8Encode(const s : WideString) : RawByteString;
  1476. var
  1477. i : SizeInt;
  1478. hs : UTF8String;
  1479. begin
  1480. result:='';
  1481. if s='' then
  1482. exit;
  1483. SetLength(hs,length(s)*3);
  1484. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
  1485. if i>0 then
  1486. begin
  1487. SetLength(hs,i-1);
  1488. result:=hs;
  1489. end;
  1490. end;
  1491. const
  1492. SNoWidestrings = 'This binary has no widestrings support compiled in.';
  1493. SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
  1494. procedure unimplementedwidestring;
  1495. begin
  1496. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1497. If IsConsole then
  1498. begin
  1499. Writeln(StdErr,SNoWidestrings);
  1500. Writeln(StdErr,SRecompileWithWidestrings);
  1501. end;
  1502. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1503. HandleErrorFrame(233,get_frame);
  1504. end;
  1505. {$warnings off}
  1506. function GenericWideCase(const s : WideString) : WideString;
  1507. begin
  1508. unimplementedwidestring;
  1509. end;
  1510. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  1511. begin
  1512. unimplementedwidestring;
  1513. end;
  1514. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  1515. begin
  1516. unimplementedwidestring;
  1517. end;
  1518. {$warnings on}
  1519. function DefaultCharLengthPChar(const Str: PChar): PtrInt;forward;
  1520. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;forward;
  1521. procedure initwidestringmanager;
  1522. begin
  1523. fillchar(widestringmanager,sizeof(widestringmanager),0);
  1524. {$ifndef HAS_WIDESTRINGMANAGER}
  1525. widestringmanager.Wide2AnsiMoveProc:=@DefaultWide2AnsiMove;
  1526. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  1527. widestringmanager.UpperWideStringProc:=@GenericWideCase;
  1528. widestringmanager.LowerWideStringProc:=@GenericWideCase;
  1529. {$endif HAS_WIDESTRINGMANAGER}
  1530. widestringmanager.CompareWideStringProc:=@CompareWideString;
  1531. widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
  1532. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1533. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1534. end;