wstrings.inc 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for WideStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. This file contains the implementation of the WideString type,
  14. and all things that are needed for it.
  15. WideString is defined as a 'silent' pwidechar :
  16. a pwidechar that points to :
  17. @-8 : SizeInt for reference count;
  18. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  19. with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
  20. Windows COM BSTR.
  21. @ : String + Terminating #0;
  22. Pwidechar(Widestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PWideRec = ^TWideRec;
  29. TWideRec = Packed Record
  30. Len : DWord;
  31. First : WideChar;
  32. end;
  33. Const
  34. WideRecLen = SizeOf(TWideRec);
  35. WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
  36. {
  37. Default WideChar <-> Char conversion is to only convert the
  38. lower 127 chars, all others are translated to spaces.
  39. These routines can be overwritten for the Current Locale
  40. }
  41. procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
  42. var
  43. i : SizeInt;
  44. destp: PChar;
  45. begin
  46. setlength(dest,len);
  47. destp := PChar(Pointer(dest));
  48. for i:=1 to len do
  49. begin
  50. if word(source^)<256 then
  51. destp^:=char(word(source^))
  52. else
  53. destp^:='?';
  54. inc(source);
  55. inc(destp);
  56. end;
  57. end;
  58. procedure DefaultAnsi2WideMove(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. begin
  62. setlength(dest,len);
  63. for i:=1 to len do
  64. begin
  65. dest[i]:=widechar(byte(source^));
  66. inc(source);
  67. end;
  68. end;
  69. (*
  70. Procedure UniqueWideString(Var S : WideString); [Public,Alias : 'FPC_WIDESTR_UNIQUE'];
  71. {
  72. Make sure reference count of S is 1,
  73. using copy-on-write semantics.
  74. }
  75. begin
  76. end;
  77. *)
  78. {****************************************************************************
  79. Internal functions, not in interface.
  80. ****************************************************************************}
  81. procedure WideStringError;
  82. begin
  83. HandleErrorFrame(204,get_frame);
  84. end;
  85. {$ifdef WideStrDebug}
  86. Procedure DumpWideRec(S : Pointer);
  87. begin
  88. If S=Nil then
  89. Writeln ('String is nil')
  90. Else
  91. Begin
  92. With PWideRec(S-WideFirstOff)^ do
  93. begin
  94. Write ('(Len:',len);
  95. Writeln (' Ref: ',ref,')');
  96. end;
  97. end;
  98. end;
  99. {$endif}
  100. Function NewWideString(Len : SizeInt) : Pointer;
  101. {
  102. Allocate a new WideString on the heap.
  103. initialize it to zero length and reference count 1.
  104. }
  105. Var
  106. P : Pointer;
  107. begin
  108. {$ifdef MSWINDOWS}
  109. if winwidestringalloc then
  110. begin
  111. P:=SysAllocStringLen(nil,Len);
  112. if P=nil then
  113. WideStringError;
  114. end
  115. else
  116. {$endif MSWINDOWS}
  117. begin
  118. GetMem(P,Len*sizeof(WideChar)+WideRecLen);
  119. If P<>Nil then
  120. begin
  121. PWideRec(P)^.Len:=Len*2; { Initial length }
  122. PWideRec(P)^.First:=#0; { Terminating #0 }
  123. inc(p,WideFirstOff); { Points to string now }
  124. end
  125. else
  126. WideStringError;
  127. end;
  128. NewWideString:=P;
  129. end;
  130. Procedure DisposeWideString(Var S : Pointer);
  131. {
  132. Deallocates a WideString From the heap.
  133. }
  134. begin
  135. If S=Nil then
  136. exit;
  137. {$ifndef MSWINDOWS}
  138. Dec (S,WideFirstOff);
  139. Freemem(S);
  140. {$else MSWINDOWS}
  141. if winwidestringalloc then
  142. SysFreeString(S)
  143. else
  144. begin
  145. Dec (S,WideFirstOff);
  146. Freemem(S);
  147. end;
  148. {$endif MSWINDOWS}
  149. S:=Nil;
  150. end;
  151. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
  152. {
  153. Decreases the ReferenceCount of a non constant widestring;
  154. If the reference count is zero, deallocate the string;
  155. }
  156. Begin
  157. DisposeWideString(S); { does test for nil }
  158. end;
  159. { alias for internal use }
  160. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
  161. Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
  162. var
  163. p : pointer;
  164. Begin
  165. If S=Nil then
  166. exit;
  167. p:=NewWidestring(length(WideString(S)));
  168. move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
  169. s:=p;
  170. end;
  171. { alias for internal use }
  172. Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
  173. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  174. function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
  175. {
  176. Converts a WideString to a ShortString;
  177. }
  178. Var
  179. Size : SizeInt;
  180. temp : ansistring;
  181. begin
  182. result:='';
  183. Size:=Length(S2);
  184. if Size>0 then
  185. begin
  186. If Size>high_of_res then
  187. Size:=high_of_res;
  188. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
  189. result:=temp;
  190. end;
  191. end;
  192. {$else FPC_STRTOSHORTSTRINGPROC}
  193. procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
  194. {
  195. Converts a WideString to a ShortString;
  196. }
  197. Var
  198. Size : SizeInt;
  199. temp : ansistring;
  200. begin
  201. res:='';
  202. Size:=Length(S2);
  203. if Size>0 then
  204. begin
  205. If Size>high(res) then
  206. Size:=high(res);
  207. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,DefaultSystemCodePage,Size);
  208. res:=temp;
  209. end;
  210. end;
  211. {$endif FPC_STRTOSHORTSTRINGPROC}
  212. Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
  213. {
  214. Converts a ShortString to a WideString;
  215. }
  216. Var
  217. Size : SizeInt;
  218. begin
  219. result:='';
  220. Size:=Length(S2);
  221. if Size>0 then
  222. begin
  223. widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  224. { Terminating Zero }
  225. PWideChar(Pointer(fpc_ShortStr_To_WideStr)+Size*sizeof(WideChar))^:=#0;
  226. end;
  227. end;
  228. Function fpc_WideStr_To_AnsiStr (const S2 : WideString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  229. {
  230. Converts a WideString to an AnsiString
  231. }
  232. Var
  233. Size : SizeInt;
  234. {$ifndef FPC_HAS_CPSTRING}
  235. cp : TSystemCodePage;
  236. {$endif FPC_HAS_CPSTRING}
  237. begin
  238. {$ifndef FPC_HAS_CPSTRING}
  239. cp:=$ffff;
  240. {$endif FPC_HAS_CPSTRING}
  241. result:='';
  242. Size:=Length(S2);
  243. if Size>0 then
  244. begin
  245. if cp=$ffff then
  246. cp:=DefaultSystemCodePage;
  247. widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,cp,Size);
  248. end;
  249. end;
  250. Function fpc_AnsiStr_To_WideStr (Const S2 : RawByteString): WideString; compilerproc;
  251. {
  252. Converts an AnsiString to a WideString;
  253. }
  254. Var
  255. Size : SizeInt;
  256. cp: TSystemCodePage;
  257. begin
  258. result:='';
  259. Size:=Length(S2);
  260. if Size>0 then
  261. begin
  262. cp:=StringCodePage(S2);
  263. if cp=$ffff then
  264. cp:=DefaultSystemCodePage;
  265. widestringmanager.Ansi2WideMoveProc(PChar(S2),cp,result,Size);
  266. end;
  267. end;
  268. Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
  269. var
  270. Size : SizeInt;
  271. begin
  272. result:='';
  273. if p=nil then
  274. exit;
  275. Size := IndexWord(p^, -1, 0);
  276. Setlength(result,Size);
  277. if Size>0 then
  278. begin
  279. Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
  280. { Terminating Zero }
  281. PWideChar(Pointer(result)+Size*sizeof(WideChar))^:=#0;
  282. end;
  283. end;
  284. { checked against the ansistring routine, 2001-05-27 (FK) }
  285. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
  286. {
  287. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  288. }
  289. begin
  290. if S1=S2 then exit;
  291. if S2<>nil then
  292. begin
  293. {$ifdef MSWINDOWS}
  294. if winwidestringalloc then
  295. begin
  296. if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
  297. WideStringError;
  298. end
  299. else
  300. {$endif MSWINDOWS}
  301. begin
  302. SetLength(WideString(S1),length(WideString(S2)));
  303. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  304. end;
  305. end
  306. else
  307. begin
  308. { Free S1 }
  309. fpc_widestr_decr_ref (S1);
  310. S1:=nil;
  311. end;
  312. end;
  313. { alias for internal use }
  314. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
  315. {$ifndef STR_CONCAT_PROCS}
  316. function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
  317. Var
  318. Size,Location : SizeInt;
  319. pc : pwidechar;
  320. begin
  321. { only assign if s1 or s2 is empty }
  322. if (S1='') then
  323. begin
  324. result:=s2;
  325. exit;
  326. end;
  327. if (S2='') then
  328. begin
  329. result:=s1;
  330. exit;
  331. end;
  332. Location:=Length(S1);
  333. Size:=length(S2);
  334. SetLength(result,Size+Location);
  335. pc:=pwidechar(result);
  336. Move(S1[1],pc^,Location*sizeof(WideChar));
  337. inc(pc,location);
  338. Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
  339. end;
  340. function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
  341. Var
  342. i : Longint;
  343. p : pointer;
  344. pc : pwidechar;
  345. Size,NewSize : SizeInt;
  346. begin
  347. { First calculate size of the result so we can do
  348. a single call to SetLength() }
  349. NewSize:=0;
  350. for i:=low(sarr) to high(sarr) do
  351. inc(Newsize,length(sarr[i]));
  352. SetLength(result,NewSize);
  353. pc:=pwidechar(result);
  354. for i:=low(sarr) to high(sarr) do
  355. begin
  356. p:=pointer(sarr[i]);
  357. if assigned(p) then
  358. begin
  359. Size:=length(widestring(p));
  360. Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
  361. inc(pc,size);
  362. end;
  363. end;
  364. end;
  365. {$else STR_CONCAT_PROCS}
  366. procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
  367. Var
  368. Size,Location : SizeInt;
  369. same : boolean;
  370. begin
  371. { only assign if s1 or s2 is empty }
  372. if (S1='') then
  373. begin
  374. DestS:=s2;
  375. exit;
  376. end;
  377. if (S2='') then
  378. begin
  379. DestS:=s1;
  380. exit;
  381. end;
  382. Location:=Length(S1);
  383. Size:=length(S2);
  384. { Use Pointer() typecasts to prevent extra conversion code }
  385. if Pointer(DestS)=Pointer(S1) then
  386. begin
  387. same:=Pointer(S1)=Pointer(S2);
  388. SetLength(DestS,Size+Location);
  389. if same then
  390. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
  391. else
  392. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  393. end
  394. else if Pointer(DestS)=Pointer(S2) then
  395. begin
  396. SetLength(DestS,Size+Location);
  397. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  398. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  399. end
  400. else
  401. begin
  402. DestS:='';
  403. SetLength(DestS,Size+Location);
  404. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  405. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  406. end;
  407. end;
  408. procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
  409. Var
  410. i : Longint;
  411. p,pc : pointer;
  412. Size,NewLen : SizeInt;
  413. DestTmp : Widestring;
  414. begin
  415. if high(sarr)=0 then
  416. begin
  417. DestS:='';
  418. exit;
  419. end;
  420. { First calculate size of the result so we can do
  421. a single call to SetLength() }
  422. NewLen:=0;
  423. for i:=low(sarr) to high(sarr) do
  424. inc(NewLen,length(sarr[i]));
  425. SetLength(DestTmp,NewLen);
  426. pc:=pwidechar(DestTmp);
  427. for i:=low(sarr) to high(sarr) do
  428. begin
  429. p:=pointer(sarr[i]);
  430. if assigned(p) then
  431. begin
  432. Size:=length(widestring(p));
  433. Move(p^,pc^,(Size+1)*sizeof(WideChar));
  434. inc(pc,size*sizeof(WideChar));
  435. end;
  436. end;
  437. DestS:=DestTmp;
  438. end;
  439. {$endif STR_CONCAT_PROCS}
  440. Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
  441. {
  442. Converts a Char to a WideString;
  443. }
  444. begin
  445. Setlength(fpc_Char_To_WideStr,1);
  446. fpc_Char_To_WideStr[1]:=c;
  447. { Terminating Zero }
  448. PWideChar(Pointer(fpc_Char_To_WideStr)+sizeof(WideChar))^:=#0;
  449. end;
  450. Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  451. {
  452. Converts a WideChar to a WideString;
  453. }
  454. begin
  455. Setlength (fpc_WChar_To_WideStr,1);
  456. fpc_WChar_To_WideStr[1]:= c;
  457. end;
  458. Function fpc_WChar_To_AnsiStr(const c : WideChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  459. {
  460. Converts a WideChar to a AnsiString;
  461. }
  462. begin
  463. widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr,{$ifdef FPC_HAS_CPSTRING}cp{$else}TSystemCodePage(0){$endif FPC_HAS_CPSTRING}, 1);
  464. end;
  465. Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  466. {
  467. Converts a WideChar to a WideString;
  468. }
  469. begin
  470. Setlength (fpc_UChar_To_WideStr,1);
  471. fpc_UChar_To_WideStr[1]:= c;
  472. end;
  473. Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
  474. Var
  475. L : SizeInt;
  476. begin
  477. if (not assigned(p)) or (p[0]=#0) Then
  478. begin
  479. fpc_pchar_to_widestr := '';
  480. exit;
  481. end;
  482. l:=IndexChar(p^,-1,#0);
  483. widestringmanager.Ansi2WideMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_WideStr,l);
  484. end;
  485. Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
  486. var
  487. i : SizeInt;
  488. begin
  489. if (zerobased) then
  490. begin
  491. if (arr[0]=#0) Then
  492. begin
  493. fpc_chararray_to_widestr := '';
  494. exit;
  495. end;
  496. i:=IndexChar(arr,high(arr)+1,#0);
  497. if i = -1 then
  498. i := high(arr)+1;
  499. end
  500. else
  501. i := high(arr)+1;
  502. SetLength(fpc_CharArray_To_WideStr,i);
  503. widestringmanager.Ansi2WideMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_WideStr,i);
  504. end;
  505. {$ifndef FPC_STRTOCHARARRAYPROC}
  506. { inside the compiler, the resulttype is modified to that of the actual }
  507. { chararray we're converting to (JM) }
  508. function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
  509. var
  510. len: SizeInt;
  511. temp: ansistring;
  512. begin
  513. len := length(src);
  514. { make sure we don't dereference src if it can be nil (JM) }
  515. if len > 0 then
  516. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
  517. len := length(temp);
  518. if len > arraysize then
  519. len := arraysize;
  520. {$r-}
  521. move(temp[1],fpc_widestr_to_chararray[0],len);
  522. fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
  523. {$ifdef RangeCheckWasOn}
  524. {$r+}
  525. {$endif}
  526. end;
  527. { inside the compiler, the resulttype is modified to that of the actual }
  528. { widechararray we're converting to (JM) }
  529. function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
  530. var
  531. len: SizeInt;
  532. begin
  533. len := length(src);
  534. if len > arraysize then
  535. len := arraysize;
  536. {$r-}
  537. { make sure we don't try to access element 1 of the ansistring if it's nil }
  538. if len > 0 then
  539. move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
  540. fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  541. {$ifdef RangeCheckWasOn}
  542. {$r+}
  543. {$endif}
  544. end;
  545. { inside the compiler, the resulttype is modified to that of the actual }
  546. { chararray we're converting to (JM) }
  547. function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
  548. var
  549. len: SizeInt;
  550. temp: widestring;
  551. begin
  552. len := length(src);
  553. { make sure we don't dereference src if it can be nil (JM) }
  554. if len > 0 then
  555. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  556. len := length(temp);
  557. if len > arraysize then
  558. len := arraysize;
  559. {$r-}
  560. move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
  561. fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  562. {$ifdef RangeCheckWasOn}
  563. {$r+}
  564. {$endif}
  565. end;
  566. function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
  567. var
  568. len: longint;
  569. temp : widestring;
  570. begin
  571. len := length(src);
  572. { make sure we don't access char 1 if length is 0 (JM) }
  573. if len > 0 then
  574. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  575. len := length(temp);
  576. if len > arraysize then
  577. len := arraysize;
  578. {$r-}
  579. move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
  580. fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  581. {$ifdef RangeCheckWasOn}
  582. {$r+}
  583. {$endif}
  584. end;
  585. {$else ndef FPC_STRTOCHARARRAYPROC}
  586. procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
  587. var
  588. len: SizeInt;
  589. temp: ansistring;
  590. begin
  591. len := length(src);
  592. { make sure we don't dereference src if it can be nil (JM) }
  593. if len > 0 then
  594. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,DefaultSystemCodePage,len);
  595. len := length(temp);
  596. if len > length(res) then
  597. len := length(res);
  598. {$r-}
  599. move(temp[1],res[0],len);
  600. fillchar(res[len],length(res)-len,0);
  601. {$ifdef RangeCheckWasOn}
  602. {$r+}
  603. {$endif}
  604. end;
  605. procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
  606. var
  607. len: SizeInt;
  608. begin
  609. len := length(src);
  610. if len > length(res) then
  611. len := length(res);
  612. {$r-}
  613. { make sure we don't try to access element 1 of the ansistring if it's nil }
  614. if len > 0 then
  615. move(src[1],res[0],len*SizeOf(WideChar));
  616. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  617. {$ifdef RangeCheckWasOn}
  618. {$r+}
  619. {$endif}
  620. end;
  621. {$endif ndef FPC_STRTOCHARARRAYPROC}
  622. Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
  623. {
  624. Compares 2 WideStrings;
  625. The result is
  626. <0 if S1<S2
  627. 0 if S1=S2
  628. >0 if S1>S2
  629. }
  630. Var
  631. MaxI,Temp : SizeInt;
  632. begin
  633. if pointer(S1)=pointer(S2) then
  634. begin
  635. fpc_WideStr_Compare:=0;
  636. exit;
  637. end;
  638. Maxi:=Length(S1);
  639. temp:=Length(S2);
  640. If MaxI>Temp then
  641. MaxI:=Temp;
  642. Temp:=CompareWord(S1[1],S2[1],MaxI);
  643. if temp=0 then
  644. temp:=Length(S1)-Length(S2);
  645. fpc_WideStr_Compare:=Temp;
  646. end;
  647. Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
  648. {
  649. Compares 2 WideStrings for equality only;
  650. The result is
  651. 0 if S1=S2
  652. <>0 if S1<>S2
  653. }
  654. Var
  655. MaxI : SizeInt;
  656. begin
  657. if pointer(S1)=pointer(S2) then
  658. exit(0);
  659. Maxi:=Length(S1);
  660. If MaxI<>Length(S2) then
  661. exit(-1)
  662. else
  663. exit(CompareWord(S1[1],S2[1],MaxI));
  664. end;
  665. {$ifdef VER2_4}
  666. // obsolete but needed for bootstrapping with 2.4
  667. Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
  668. begin
  669. if p=nil then
  670. HandleErrorFrame(201,get_frame);
  671. end;
  672. Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  673. begin
  674. if (index>len div 2) or (Index<1) then
  675. HandleErrorFrame(201,get_frame);
  676. end;
  677. {$else VER2_4}
  678. Procedure fpc_WideStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  679. begin
  680. if (p=nil) or (index>PWideRec(p-WideFirstOff)^.len div 2) or (Index<1) then
  681. HandleErrorFrame(201,get_frame);
  682. end;
  683. {$endif VER2_4}
  684. Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
  685. {
  686. Sets The length of string S to L.
  687. Makes sure S is unique, and contains enough room.
  688. }
  689. Var
  690. Temp : Pointer;
  691. movelen: SizeInt;
  692. begin
  693. if (l>0) then
  694. begin
  695. if Pointer(S)=nil then
  696. begin
  697. { Need a complete new string...}
  698. Pointer(s):=NewWideString(l);
  699. end
  700. { windows doesn't support reallocing widestrings, this code
  701. is anyways subject to be removed because widestrings shouldn't be
  702. ref. counted anymore (FK) }
  703. else
  704. if
  705. {$ifdef MSWINDOWS}
  706. not winwidestringalloc and
  707. {$endif MSWINDOWS}
  708. True
  709. then
  710. begin
  711. Dec(Pointer(S),WideFirstOff);
  712. if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
  713. reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
  714. Inc(Pointer(S), WideFirstOff);
  715. end
  716. else
  717. begin
  718. { Reallocation is needed... }
  719. Temp:=Pointer(NewWideString(L));
  720. if Length(S)>0 then
  721. begin
  722. if l < succ(length(s)) then
  723. movelen := l
  724. { also move terminating null }
  725. else
  726. movelen := succ(length(s));
  727. Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
  728. end;
  729. fpc_widestr_decr_ref(Pointer(S));
  730. Pointer(S):=Temp;
  731. end;
  732. { Force nil termination in case it gets shorter }
  733. PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
  734. {$ifdef MSWINDOWS}
  735. if not winwidestringalloc then
  736. {$endif MSWINDOWS}
  737. PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
  738. end
  739. else
  740. begin
  741. { Length=0 }
  742. if Pointer(S)<>nil then
  743. fpc_widestr_decr_ref (Pointer(S));
  744. Pointer(S):=Nil;
  745. end;
  746. end;
  747. {*****************************************************************************
  748. Public functions, In interface.
  749. *****************************************************************************}
  750. Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
  751. begin
  752. pointer(result) := pointer(s);
  753. end;
  754. Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
  755. var
  756. ResultAddress : Pointer;
  757. begin
  758. ResultAddress:=Nil;
  759. dec(index);
  760. if Index < 0 then
  761. Index := 0;
  762. { Check Size. Accounts for Zero-length S, the double check is needed because
  763. Size can be maxint and will get <0 when adding index }
  764. if (Size>Length(S)) or
  765. (Index+Size>Length(S)) then
  766. Size:=Length(S)-Index;
  767. If Size>0 then
  768. begin
  769. If Index<0 Then
  770. Index:=0;
  771. ResultAddress:=Pointer(NewWideString (Size));
  772. if ResultAddress<>Nil then
  773. begin
  774. Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
  775. PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
  776. PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
  777. end;
  778. end;
  779. fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
  780. Pointer(fpc_widestr_Copy):=ResultAddress;
  781. end;
  782. Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
  783. var
  784. i,MaxLen : SizeInt;
  785. pc : pwidechar;
  786. begin
  787. Pos:=0;
  788. if Length(SubStr)>0 then
  789. begin
  790. MaxLen:=Length(source)-Length(SubStr);
  791. i:=0;
  792. pc:=@source[1];
  793. while (i<=MaxLen) do
  794. begin
  795. inc(i);
  796. if (SubStr[1]=pc^) and
  797. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  798. begin
  799. Pos:=i;
  800. exit;
  801. end;
  802. inc(pc);
  803. end;
  804. end;
  805. end;
  806. { Faster version for a widechar alone }
  807. Function Pos (c : WideChar; Const s : WideString) : SizeInt;
  808. var
  809. i: SizeInt;
  810. pc : pwidechar;
  811. begin
  812. pc:=@s[1];
  813. for i:=1 to length(s) do
  814. begin
  815. if pc^=c then
  816. begin
  817. pos:=i;
  818. exit;
  819. end;
  820. inc(pc);
  821. end;
  822. pos:=0;
  823. end;
  824. Function Pos (c : WideChar; Const s : RawByteString) : SizeInt;
  825. begin
  826. result:=Pos(c,WideString(s));
  827. end;
  828. Function Pos (c : RawByteString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  829. begin
  830. result:=Pos(WideString(c),s);
  831. end;
  832. Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  833. begin
  834. result:=Pos(WideString(c),s);
  835. end;
  836. Function Pos (c : WideString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  837. begin
  838. result:=Pos(c,WideString(s));
  839. end;
  840. { Faster version for a char alone. Must be implemented because }
  841. { pos(c: char; const s: shortstring) also exists, so otherwise }
  842. { using pos(char,pchar) will always call the shortstring version }
  843. { (exact match for first argument), also with $h+ (JM) }
  844. Function Pos (c : Char; Const s : WideString) : SizeInt;
  845. var
  846. i: SizeInt;
  847. wc : widechar;
  848. pc : pwidechar;
  849. begin
  850. wc:=c;
  851. pc:=@s[1];
  852. for i:=1 to length(s) do
  853. begin
  854. if pc^=wc then
  855. begin
  856. pos:=i;
  857. exit;
  858. end;
  859. inc(pc);
  860. end;
  861. pos:=0;
  862. end;
  863. Procedure Delete (Var S : WideString; Index,Size: SizeInt);
  864. Var
  865. LS : SizeInt;
  866. begin
  867. LS:=Length(S);
  868. if (Index>LS) or (Index<=0) or (Size<=0) then
  869. exit;
  870. UniqueString (S);
  871. { (Size+Index) will overflow if Size=MaxInt. }
  872. if Size>LS-Index then
  873. Size:=LS-Index+1;
  874. if Size<=LS-Index then
  875. begin
  876. Dec(Index);
  877. Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
  878. end;
  879. Setlength(s,LS-Size);
  880. end;
  881. Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
  882. var
  883. Temp : WideString;
  884. LS : SizeInt;
  885. begin
  886. If Length(Source)=0 then
  887. exit;
  888. if index <= 0 then
  889. index := 1;
  890. Ls:=Length(S);
  891. if index > LS then
  892. index := LS+1;
  893. Dec(Index);
  894. Pointer(Temp) := NewWideString(Length(Source)+LS);
  895. SetLength(Temp,Length(Source)+LS);
  896. If Index>0 then
  897. move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
  898. Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
  899. If (LS-Index)>0 then
  900. Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
  901. S:=Temp;
  902. end;
  903. function UpCase(const s : WideString) : WideString;
  904. begin
  905. result:=widestringmanager.UpperWideStringProc(s);
  906. end;
  907. Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
  908. begin
  909. SetLength(S,Len);
  910. If (Buf<>Nil) and (Len>0) then
  911. Move (Buf[0],S[1],Len*sizeof(WideChar));
  912. end;
  913. Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
  914. begin
  915. SetLength(S,Len);
  916. If (Buf<>Nil) and (Len>0) then
  917. begin
  918. BufLen := IndexByte(Buf^, Len+1, 0);
  919. If (BufLen>0) and (BufLen < Len) then
  920. Len := BufLen;
  921. widestringmanager.Ansi2WideMoveProc(Buf,DefaultSystemCodePage,S,Len);
  922. //PWideChar(Pointer(S)+Len*sizeof(WideChar))^:=#0;
  923. end;
  924. end;
  925. {$ifndef FPUNONE}
  926. Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
  927. Var
  928. SS : String;
  929. begin
  930. fpc_Val_Real_WideStr := 0;
  931. if length(S) > 255 then
  932. code := 256
  933. else
  934. begin
  935. SS := S;
  936. Val(SS,fpc_Val_Real_WideStr,code);
  937. end;
  938. end;
  939. {$endif}
  940. function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
  941. var ss:shortstring;
  942. begin
  943. if length(s)>255 then
  944. code:=256
  945. else
  946. begin
  947. ss:=s;
  948. val(ss,fpc_val_enum_widestr,code);
  949. end;
  950. end;
  951. Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
  952. Var
  953. SS : String;
  954. begin
  955. if length(S) > 255 then
  956. begin
  957. fpc_Val_Currency_WideStr:=0;
  958. code := 256;
  959. end
  960. else
  961. begin
  962. SS := S;
  963. Val(SS,fpc_Val_Currency_WideStr,code);
  964. end;
  965. end;
  966. Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
  967. Var
  968. SS : ShortString;
  969. begin
  970. fpc_Val_UInt_WideStr := 0;
  971. if length(S) > 255 then
  972. code := 256
  973. else
  974. begin
  975. SS := S;
  976. Val(SS,fpc_Val_UInt_WideStr,code);
  977. end;
  978. end;
  979. Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
  980. Var
  981. SS : ShortString;
  982. begin
  983. fpc_Val_SInt_WideStr:=0;
  984. if length(S)>255 then
  985. code:=256
  986. else
  987. begin
  988. SS := S;
  989. fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  990. end;
  991. end;
  992. {$ifndef CPU64}
  993. Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
  994. Var
  995. SS : ShortString;
  996. begin
  997. fpc_Val_qword_WideStr:=0;
  998. if length(S)>255 then
  999. code:=256
  1000. else
  1001. begin
  1002. SS := S;
  1003. Val(SS,fpc_Val_qword_WideStr,Code);
  1004. end;
  1005. end;
  1006. Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
  1007. Var
  1008. SS : ShortString;
  1009. begin
  1010. fpc_Val_int64_WideStr:=0;
  1011. if length(S)>255 then
  1012. code:=256
  1013. else
  1014. begin
  1015. SS := S;
  1016. Val(SS,fpc_Val_int64_WideStr,Code);
  1017. end;
  1018. end;
  1019. {$endif CPU64}
  1020. {$ifndef FPUNONE}
  1021. procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
  1022. var
  1023. ss : shortstring;
  1024. begin
  1025. str_real(len,fr,d,treal_type(rt),ss);
  1026. s:=ss;
  1027. end;
  1028. {$endif}
  1029. procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
  1030. var ss:shortstring;
  1031. begin
  1032. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1033. s:=ss;
  1034. end;
  1035. procedure fpc_widestr_bool(b : boolean;len:sizeint;out s:widestring);compilerproc;
  1036. var ss:shortstring;
  1037. begin
  1038. fpc_shortstr_bool(b,len,ss);
  1039. s:=ss;
  1040. end;
  1041. {$ifdef FPC_HAS_STR_CURRENCY}
  1042. procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
  1043. var
  1044. ss : shortstring;
  1045. begin
  1046. str(c:len:fr,ss);
  1047. s:=ss;
  1048. end;
  1049. {$endif FPC_HAS_STR_CURRENCY}
  1050. Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
  1051. Var
  1052. SS : ShortString;
  1053. begin
  1054. Str (v:Len,SS);
  1055. S:=SS;
  1056. end;
  1057. Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
  1058. Var
  1059. SS : ShortString;
  1060. begin
  1061. str(v:Len,SS);
  1062. S:=SS;
  1063. end;
  1064. {$ifndef CPU64}
  1065. Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
  1066. Var
  1067. SS : ShortString;
  1068. begin
  1069. Str (v:Len,SS);
  1070. S:=SS;
  1071. end;
  1072. Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
  1073. Var
  1074. SS : ShortString;
  1075. begin
  1076. str(v:Len,SS);
  1077. S:=SS;
  1078. end;
  1079. {$endif CPU64}
  1080. { converts an utf-16 code point or surrogate pair to utf-32 }
  1081. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
  1082. var
  1083. w: widechar;
  1084. begin
  1085. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1086. { are the same in UTF-32 }
  1087. w:=s[index];
  1088. if (w<=#$d7ff) or
  1089. (w>=#$e000) then
  1090. begin
  1091. result:=UCS4Char(w);
  1092. len:=1;
  1093. end
  1094. { valid surrogate pair? }
  1095. else if (w<=#$dbff) and
  1096. { w>=#$d7ff check not needed, checked above }
  1097. (index<length(s)) and
  1098. (s[index+1]>=#$dc00) and
  1099. (s[index+1]<=#$dfff) then
  1100. { convert the surrogate pair to UTF-32 }
  1101. begin
  1102. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1103. len:=2;
  1104. end
  1105. else
  1106. { invalid surrogate -> do nothing }
  1107. begin
  1108. result:=UCS4Char(w);
  1109. len:=1;
  1110. end;
  1111. end;
  1112. function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1113. begin
  1114. if assigned(Source) then
  1115. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1116. else
  1117. Result:=0;
  1118. end;
  1119. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
  1120. var
  1121. i,j : SizeUInt;
  1122. w : word;
  1123. lw : longword;
  1124. len : longint;
  1125. begin
  1126. result:=0;
  1127. if source=nil then
  1128. exit;
  1129. i:=0;
  1130. j:=0;
  1131. if assigned(Dest) then
  1132. begin
  1133. while (i<SourceChars) and (j<MaxDestBytes) do
  1134. begin
  1135. w:=word(Source[i]);
  1136. case w of
  1137. 0..$7f:
  1138. begin
  1139. Dest[j]:=char(w);
  1140. inc(j);
  1141. end;
  1142. $80..$7ff:
  1143. begin
  1144. if j+1>=MaxDestBytes then
  1145. break;
  1146. Dest[j]:=char($c0 or (w shr 6));
  1147. Dest[j+1]:=char($80 or (w and $3f));
  1148. inc(j,2);
  1149. end;
  1150. $800..$d7ff,$e000..$ffff:
  1151. begin
  1152. if j+2>=MaxDestBytes then
  1153. break;
  1154. Dest[j]:=char($e0 or (w shr 12));
  1155. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1156. Dest[j+2]:=char($80 or (w and $3f));
  1157. inc(j,3);
  1158. end;
  1159. $d800..$dbff:
  1160. {High Surrogates}
  1161. begin
  1162. if j+3>=MaxDestBytes then
  1163. break;
  1164. if (i<sourcechars-1) and
  1165. (word(Source[i+1]) >= $dc00) and
  1166. (word(Source[i+1]) <= $dfff) then
  1167. begin
  1168. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1169. Dest[j]:=char($f0 or (lw shr 18));
  1170. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1171. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1172. Dest[j+3]:=char($80 or (lw and $3f));
  1173. inc(j,4);
  1174. inc(i);
  1175. end;
  1176. end;
  1177. end;
  1178. inc(i);
  1179. end;
  1180. if j>SizeUInt(MaxDestBytes-1) then
  1181. j:=MaxDestBytes-1;
  1182. Dest[j]:=#0;
  1183. end
  1184. else
  1185. begin
  1186. while i<SourceChars do
  1187. begin
  1188. case word(Source[i]) of
  1189. $0..$7f:
  1190. inc(j);
  1191. $80..$7ff:
  1192. inc(j,2);
  1193. $800..$d7ff,$e000..$ffff:
  1194. inc(j,3);
  1195. $d800..$dbff:
  1196. begin
  1197. if (i<sourcechars-1) and
  1198. (word(Source[i+1]) >= $dc00) and
  1199. (word(Source[i+1]) <= $dfff) then
  1200. begin
  1201. inc(j,4);
  1202. inc(i);
  1203. end;
  1204. end;
  1205. end;
  1206. inc(i);
  1207. end;
  1208. end;
  1209. result:=j+1;
  1210. end;
  1211. function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1212. begin
  1213. if assigned(Source) then
  1214. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1215. else
  1216. Result:=0;
  1217. end;
  1218. function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1219. const
  1220. UNICODE_INVALID=63;
  1221. var
  1222. InputUTF8: SizeUInt;
  1223. IBYTE: BYTE;
  1224. OutputUnicode: SizeUInt;
  1225. PRECHAR: SizeUInt;
  1226. TempBYTE: BYTE;
  1227. CharLen: SizeUint;
  1228. LookAhead: SizeUInt;
  1229. UC: SizeUInt;
  1230. begin
  1231. if not assigned(Source) then
  1232. begin
  1233. result:=0;
  1234. exit;
  1235. end;
  1236. result:=SizeUInt(-1);
  1237. InputUTF8:=0;
  1238. OutputUnicode:=0;
  1239. PreChar:=0;
  1240. if Assigned(Dest) Then
  1241. begin
  1242. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1243. begin
  1244. IBYTE:=byte(Source[InputUTF8]);
  1245. if (IBYTE and $80) = 0 then
  1246. begin
  1247. //One character US-ASCII, convert it to unicode
  1248. if IBYTE = 10 then
  1249. begin
  1250. If (PreChar<>13) and FALSE then
  1251. begin
  1252. //Expand to crlf, conform UTF-8.
  1253. //This procedure will break the memory alocation by
  1254. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1255. if OutputUnicode+1<MaxDestChars then
  1256. begin
  1257. Dest[OutputUnicode]:=WideChar(13);
  1258. inc(OutputUnicode);
  1259. Dest[OutputUnicode]:=WideChar(10);
  1260. inc(OutputUnicode);
  1261. PreChar:=10;
  1262. end
  1263. else
  1264. begin
  1265. Dest[OutputUnicode]:=WideChar(13);
  1266. inc(OutputUnicode);
  1267. end;
  1268. end
  1269. else
  1270. begin
  1271. Dest[OutputUnicode]:=WideChar(IBYTE);
  1272. inc(OutputUnicode);
  1273. PreChar:=IBYTE;
  1274. end;
  1275. end
  1276. else
  1277. begin
  1278. Dest[OutputUnicode]:=WideChar(IBYTE);
  1279. inc(OutputUnicode);
  1280. PreChar:=IBYTE;
  1281. end;
  1282. inc(InputUTF8);
  1283. end
  1284. else
  1285. begin
  1286. TempByte:=IBYTE;
  1287. CharLen:=0;
  1288. while (TempBYTE and $80)<>0 do
  1289. begin
  1290. TempBYTE:=(TempBYTE shl 1) and $FE;
  1291. inc(CharLen);
  1292. end;
  1293. //Test for the "CharLen" conforms UTF-8 string
  1294. //This means the 10xxxxxx pattern.
  1295. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1296. begin
  1297. //Insuficient chars in string to decode
  1298. //UTF-8 array. Fallback to single char.
  1299. CharLen:= 1;
  1300. end;
  1301. for LookAhead := 1 to CharLen-1 do
  1302. begin
  1303. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1304. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1305. begin
  1306. //Invalid UTF-8 sequence, fallback.
  1307. CharLen:= LookAhead;
  1308. break;
  1309. end;
  1310. end;
  1311. UC:=$FFFF;
  1312. case CharLen of
  1313. 1: begin
  1314. //Not valid UTF-8 sequence
  1315. UC:=UNICODE_INVALID;
  1316. end;
  1317. 2: begin
  1318. //Two bytes UTF, convert it
  1319. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1320. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1321. if UC <= $7F then
  1322. begin
  1323. //Invalid UTF sequence.
  1324. UC:=UNICODE_INVALID;
  1325. end;
  1326. end;
  1327. 3: begin
  1328. //Three bytes, convert it to unicode
  1329. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1330. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1331. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1332. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1333. begin
  1334. //Invalid UTF-8 sequence
  1335. UC:= UNICODE_INVALID;
  1336. End;
  1337. end;
  1338. 4: begin
  1339. //Four bytes, convert it to two unicode characters
  1340. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1341. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1342. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1343. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1344. if (UC < $10000) or (UC > $10FFFF) then
  1345. begin
  1346. UC:= UNICODE_INVALID;
  1347. end
  1348. else
  1349. begin
  1350. { only store pair if room }
  1351. dec(UC,$10000);
  1352. if (OutputUnicode<MaxDestChars-1) then
  1353. begin
  1354. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1355. inc(OutputUnicode);
  1356. UC:=(UC and $3ff) + $DC00;
  1357. end
  1358. else
  1359. begin
  1360. InputUTF8:= InputUTF8 + CharLen;
  1361. { don't store anything }
  1362. CharLen:=0;
  1363. end;
  1364. end;
  1365. end;
  1366. 5,6,7: begin
  1367. //Invalid UTF8 to unicode conversion,
  1368. //mask it as invalid UNICODE too.
  1369. UC:=UNICODE_INVALID;
  1370. end;
  1371. end;
  1372. if CharLen > 0 then
  1373. begin
  1374. PreChar:=UC;
  1375. Dest[OutputUnicode]:=WideChar(UC);
  1376. inc(OutputUnicode);
  1377. end;
  1378. InputUTF8:= InputUTF8 + CharLen;
  1379. end;
  1380. end;
  1381. Result:=OutputUnicode+1;
  1382. end
  1383. else
  1384. begin
  1385. while (InputUTF8<SourceBytes) do
  1386. begin
  1387. IBYTE:=byte(Source[InputUTF8]);
  1388. if (IBYTE and $80) = 0 then
  1389. begin
  1390. //One character US-ASCII, convert it to unicode
  1391. if IBYTE = 10 then
  1392. begin
  1393. if (PreChar<>13) and FALSE then
  1394. begin
  1395. //Expand to crlf, conform UTF-8.
  1396. //This procedure will break the memory alocation by
  1397. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1398. inc(OutputUnicode,2);
  1399. PreChar:=10;
  1400. end
  1401. else
  1402. begin
  1403. inc(OutputUnicode);
  1404. PreChar:=IBYTE;
  1405. end;
  1406. end
  1407. else
  1408. begin
  1409. inc(OutputUnicode);
  1410. PreChar:=IBYTE;
  1411. end;
  1412. inc(InputUTF8);
  1413. end
  1414. else
  1415. begin
  1416. TempByte:=IBYTE;
  1417. CharLen:=0;
  1418. while (TempBYTE and $80)<>0 do
  1419. begin
  1420. TempBYTE:=(TempBYTE shl 1) and $FE;
  1421. inc(CharLen);
  1422. end;
  1423. //Test for the "CharLen" conforms UTF-8 string
  1424. //This means the 10xxxxxx pattern.
  1425. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1426. begin
  1427. //Insuficient chars in string to decode
  1428. //UTF-8 array. Fallback to single char.
  1429. CharLen:= 1;
  1430. end;
  1431. for LookAhead := 1 to CharLen-1 do
  1432. begin
  1433. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1434. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1435. begin
  1436. //Invalid UTF-8 sequence, fallback.
  1437. CharLen:= LookAhead;
  1438. break;
  1439. end;
  1440. end;
  1441. UC:=$FFFF;
  1442. case CharLen of
  1443. 1: begin
  1444. //Not valid UTF-8 sequence
  1445. UC:=UNICODE_INVALID;
  1446. end;
  1447. 2: begin
  1448. //Two bytes UTF, convert it
  1449. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1450. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1451. if UC <= $7F then
  1452. begin
  1453. //Invalid UTF sequence.
  1454. UC:=UNICODE_INVALID;
  1455. end;
  1456. end;
  1457. 3: begin
  1458. //Three bytes, convert it to unicode
  1459. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1460. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1461. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1462. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1463. begin
  1464. //Invalid UTF-8 sequence
  1465. UC:= UNICODE_INVALID;
  1466. end;
  1467. end;
  1468. 4: begin
  1469. //Four bytes, convert it to two unicode characters
  1470. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1471. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1472. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1473. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1474. if (UC < $10000) or (UC > $10FFFF) then
  1475. UC:= UNICODE_INVALID
  1476. else
  1477. { extra character character }
  1478. inc(OutputUnicode);
  1479. end;
  1480. 5,6,7: begin
  1481. //Invalid UTF8 to unicode conversion,
  1482. //mask it as invalid UNICODE too.
  1483. UC:=UNICODE_INVALID;
  1484. end;
  1485. end;
  1486. if CharLen > 0 then
  1487. begin
  1488. PreChar:=UC;
  1489. inc(OutputUnicode);
  1490. end;
  1491. InputUTF8:= InputUTF8 + CharLen;
  1492. end;
  1493. end;
  1494. Result:=OutputUnicode+1;
  1495. end;
  1496. end;
  1497. function UTF8Encode(const s : WideString) : UTF8String;
  1498. var
  1499. i : SizeInt;
  1500. hs : UTF8String;
  1501. begin
  1502. result:='';
  1503. if s='' then
  1504. exit;
  1505. SetLength(hs,length(s)*3);
  1506. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
  1507. if i>0 then
  1508. begin
  1509. SetLength(hs,i-1);
  1510. result:=hs;
  1511. end;
  1512. end;
  1513. const
  1514. SNoWidestrings = 'This binary has no widestrings support compiled in.';
  1515. SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
  1516. procedure unimplementedwidestring;
  1517. begin
  1518. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1519. If IsConsole then
  1520. begin
  1521. Writeln(StdErr,SNoWidestrings);
  1522. Writeln(StdErr,SRecompileWithWidestrings);
  1523. end;
  1524. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1525. HandleErrorFrame(233,get_frame);
  1526. end;
  1527. {$warnings off}
  1528. function GenericWideCase(const s : WideString) : WideString;
  1529. begin
  1530. unimplementedwidestring;
  1531. end;
  1532. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  1533. begin
  1534. unimplementedwidestring;
  1535. end;
  1536. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  1537. begin
  1538. unimplementedwidestring;
  1539. end;
  1540. {$warnings on}
  1541. function DefaultCharLengthPChar(const Str: PChar): PtrInt;forward;
  1542. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;forward;
  1543. procedure initwidestringmanager;
  1544. begin
  1545. fillchar(widestringmanager,sizeof(widestringmanager),0);
  1546. {$ifndef HAS_WIDESTRINGMANAGER}
  1547. widestringmanager.Wide2AnsiMoveProc:=@DefaultWide2AnsiMove;
  1548. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  1549. widestringmanager.UpperWideStringProc:=@GenericWideCase;
  1550. widestringmanager.LowerWideStringProc:=@GenericWideCase;
  1551. {$endif HAS_WIDESTRINGMANAGER}
  1552. widestringmanager.CompareWideStringProc:=@CompareWideString;
  1553. widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
  1554. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1555. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1556. end;