wstrings.inc 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for WideStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. This file contains the implementation of the WideString type,
  14. and all things that are needed for it.
  15. WideString is defined as a 'silent' pwidechar :
  16. a pwidechar that points to :
  17. @-8 : SizeInt for reference count;
  18. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  19. with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
  20. Windows COM BSTR.
  21. @ : String + Terminating #0;
  22. Pwidechar(Widestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PWideRec = ^TWideRec;
  29. TWideRec = Packed Record
  30. Len : DWord;
  31. First : WideChar;
  32. end;
  33. Const
  34. WideRecLen = SizeOf(TWideRec);
  35. WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
  36. {
  37. Default WideChar <-> Char conversion is to only convert the
  38. lower 127 chars, all others are translated to spaces.
  39. These routines can be overwritten for the Current Locale
  40. }
  41. procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
  42. var
  43. i : SizeInt;
  44. destp: PChar;
  45. begin
  46. setlength(dest,len);
  47. destp := PChar(Pointer(dest));
  48. for i:=1 to len do
  49. begin
  50. if word(source^)<256 then
  51. destp^:=char(word(source^))
  52. else
  53. destp^:='?';
  54. inc(source);
  55. inc(destp);
  56. end;
  57. end;
  58. procedure DefaultAnsi2WideMove(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. begin
  62. setlength(dest,len);
  63. for i:=1 to len do
  64. begin
  65. dest[i]:=widechar(byte(source^));
  66. inc(source);
  67. end;
  68. end;
  69. {****************************************************************************
  70. Internal functions, not in interface.
  71. ****************************************************************************}
  72. procedure WideStringError;
  73. begin
  74. HandleErrorFrame(204,get_frame);
  75. end;
  76. {$ifdef WideStrDebug}
  77. Procedure DumpWideRec(S : Pointer);
  78. begin
  79. If S=Nil then
  80. Writeln ('String is nil')
  81. Else
  82. Begin
  83. With PWideRec(S-WideFirstOff)^ do
  84. begin
  85. Write ('(Len:',len);
  86. Writeln (' Ref: ',ref,')');
  87. end;
  88. end;
  89. end;
  90. {$endif}
  91. Function NewWideString(Len : SizeInt) : Pointer;
  92. {
  93. Allocate a new WideString on the heap.
  94. initialize it to zero length and reference count 1.
  95. }
  96. Var
  97. P : Pointer;
  98. begin
  99. {$ifdef MSWINDOWS}
  100. if winwidestringalloc then
  101. begin
  102. P:=SysAllocStringLen(nil,Len);
  103. if P=nil then
  104. WideStringError;
  105. end
  106. else
  107. {$endif MSWINDOWS}
  108. begin
  109. GetMem(P,Len*sizeof(WideChar)+WideRecLen);
  110. If P<>Nil then
  111. begin
  112. PWideRec(P)^.Len:=Len*2; { Initial length }
  113. PWideRec(P)^.First:=#0; { Terminating #0 }
  114. inc(p,WideFirstOff); { Points to string now }
  115. end
  116. else
  117. WideStringError;
  118. end;
  119. NewWideString:=P;
  120. end;
  121. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
  122. {
  123. Decreases the ReferenceCount of a non constant widestring;
  124. If the reference count is zero, deallocate the string;
  125. }
  126. Begin
  127. If S=Nil then
  128. exit;
  129. {$ifdef MSWINDOWS}
  130. if winwidestringalloc then
  131. SysFreeString(S)
  132. else
  133. {$endif MSWINDOWS}
  134. begin
  135. Dec (S,WideFirstOff);
  136. Freemem(S);
  137. end;
  138. S:=Nil;
  139. end;
  140. { alias for internal use }
  141. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
  142. Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
  143. var
  144. p : pointer;
  145. Begin
  146. If S=Nil then
  147. exit;
  148. p:=NewWidestring(length(WideString(S)));
  149. move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
  150. s:=p;
  151. end;
  152. { alias for internal use }
  153. Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
  154. procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
  155. {
  156. Converts a WideString to a ShortString;
  157. }
  158. Var
  159. Size : SizeInt;
  160. temp : ansistring;
  161. begin
  162. res:='';
  163. Size:=Length(S2);
  164. if Size>0 then
  165. begin
  166. If Size>high(res) then
  167. Size:=high(res);
  168. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,DefaultSystemCodePage,Size);
  169. res:=temp;
  170. end;
  171. end;
  172. Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
  173. {
  174. Converts a ShortString to a WideString;
  175. }
  176. Var
  177. Size : SizeInt;
  178. begin
  179. result:='';
  180. Size:=Length(S2);
  181. if Size>0 then
  182. widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  183. end;
  184. Function fpc_WideStr_To_AnsiStr (const S2 : WideString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  185. {
  186. Converts a WideString to an AnsiString
  187. }
  188. Var
  189. Size : SizeInt;
  190. {$ifndef FPC_HAS_CPSTRING}
  191. cp : TSystemCodePage;
  192. {$endif FPC_HAS_CPSTRING}
  193. begin
  194. {$ifndef FPC_HAS_CPSTRING}
  195. cp:=DefaultSystemCodePage;
  196. {$endif FPC_HAS_CPSTRING}
  197. result:='';
  198. Size:=Length(S2);
  199. if Size>0 then
  200. begin
  201. if (cp=CP_ACP) then
  202. cp:=DefaultSystemCodePage;
  203. widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,cp,Size);
  204. end;
  205. end;
  206. Function fpc_AnsiStr_To_WideStr(Const S2 : RawByteString): WideString; compilerproc;
  207. {
  208. Converts an AnsiString to a WideString;
  209. }
  210. Var
  211. Size : SizeInt;
  212. cp: TSystemCodePage;
  213. begin
  214. result:='';
  215. Size:=Length(S2);
  216. if Size>0 then
  217. begin
  218. cp:=StringCodePage(S2);
  219. if (cp=CP_ACP) then
  220. cp:=DefaultSystemCodePage;
  221. widestringmanager.Ansi2WideMoveProc(PChar(S2),cp,result,Size);
  222. end;
  223. end;
  224. Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
  225. var
  226. Size : SizeInt;
  227. begin
  228. result:='';
  229. if p=nil then
  230. exit;
  231. Size := IndexWord(p^, -1, 0);
  232. Setlength(result,Size); // zero-terminates
  233. if Size>0 then
  234. Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
  235. end;
  236. { checked against the ansistring routine, 2001-05-27 (FK) }
  237. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
  238. {
  239. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  240. }
  241. begin
  242. if S1=S2 then exit;
  243. if S2<>nil then
  244. begin
  245. {$ifdef MSWINDOWS}
  246. if winwidestringalloc then
  247. begin
  248. if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
  249. WideStringError;
  250. end
  251. else
  252. {$endif MSWINDOWS}
  253. begin
  254. SetLength(WideString(S1),length(WideString(S2)));
  255. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  256. end;
  257. end
  258. else
  259. begin
  260. { Free S1 }
  261. fpc_widestr_decr_ref (S1);
  262. S1:=nil;
  263. end;
  264. end;
  265. { alias for internal use }
  266. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
  267. procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
  268. Var
  269. Size,Location : SizeInt;
  270. same : boolean;
  271. begin
  272. { only assign if s1 or s2 is empty }
  273. if (S1='') then
  274. begin
  275. DestS:=s2;
  276. exit;
  277. end;
  278. if (S2='') then
  279. begin
  280. DestS:=s1;
  281. exit;
  282. end;
  283. Location:=Length(S1);
  284. Size:=length(S2);
  285. { Use Pointer() typecasts to prevent extra conversion code }
  286. if Pointer(DestS)=Pointer(S1) then
  287. begin
  288. same:=Pointer(S1)=Pointer(S2);
  289. SetLength(DestS,Size+Location);
  290. if same then
  291. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
  292. else
  293. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  294. end
  295. else if Pointer(DestS)=Pointer(S2) then
  296. begin
  297. SetLength(DestS,Size+Location);
  298. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  299. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  300. end
  301. else
  302. begin
  303. DestS:='';
  304. SetLength(DestS,Size+Location);
  305. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  306. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  307. end;
  308. end;
  309. procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
  310. Var
  311. i : Longint;
  312. p,pc : pointer;
  313. Size,NewLen : SizeInt;
  314. DestTmp : Widestring;
  315. begin
  316. if high(sarr)=0 then
  317. begin
  318. DestS:='';
  319. exit;
  320. end;
  321. { First calculate size of the result so we can do
  322. a single call to SetLength() }
  323. NewLen:=0;
  324. for i:=low(sarr) to high(sarr) do
  325. inc(NewLen,length(sarr[i]));
  326. SetLength(DestTmp,NewLen);
  327. pc:=pwidechar(DestTmp);
  328. for i:=low(sarr) to high(sarr) do
  329. begin
  330. p:=pointer(sarr[i]);
  331. if assigned(p) then
  332. begin
  333. Size:=length(widestring(p));
  334. Move(p^,pc^,(Size+1)*sizeof(WideChar));
  335. inc(pc,size*sizeof(WideChar));
  336. end;
  337. end;
  338. DestS:=DestTmp;
  339. end;
  340. Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
  341. {
  342. Converts a Char to a WideString;
  343. }
  344. begin
  345. Setlength(fpc_Char_To_WideStr,1);
  346. fpc_Char_To_WideStr[1]:=c;
  347. end;
  348. Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  349. {
  350. Converts a WideChar to a WideString;
  351. }
  352. begin
  353. Setlength (fpc_WChar_To_WideStr,1);
  354. fpc_WChar_To_WideStr[1]:= c;
  355. end;
  356. Function fpc_WChar_To_AnsiStr(const c : WideChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  357. {
  358. Converts a WideChar to a AnsiString;
  359. }
  360. begin
  361. widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr,{$ifdef FPC_HAS_CPSTRING}cp{$else}TSystemCodePage(0){$endif FPC_HAS_CPSTRING}, 1);
  362. end;
  363. Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  364. {
  365. Converts a WideChar to a WideString;
  366. }
  367. begin
  368. Setlength (fpc_UChar_To_WideStr,1);
  369. fpc_UChar_To_WideStr[1]:= c;
  370. end;
  371. Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
  372. Var
  373. L : SizeInt;
  374. begin
  375. if (not assigned(p)) or (p[0]=#0) Then
  376. begin
  377. fpc_pchar_to_widestr := '';
  378. exit;
  379. end;
  380. l:=IndexChar(p^,-1,#0);
  381. widestringmanager.Ansi2WideMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_WideStr,l);
  382. end;
  383. Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
  384. var
  385. i : SizeInt;
  386. begin
  387. if (zerobased) then
  388. begin
  389. if (arr[0]=#0) Then
  390. begin
  391. fpc_chararray_to_widestr := '';
  392. exit;
  393. end;
  394. i:=IndexChar(arr,high(arr)+1,#0);
  395. if i = -1 then
  396. i := high(arr)+1;
  397. end
  398. else
  399. i := high(arr)+1;
  400. widestringmanager.Ansi2WideMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_WideStr,i);
  401. end;
  402. procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
  403. var
  404. len: SizeInt;
  405. temp: ansistring;
  406. begin
  407. len := length(src);
  408. { make sure we don't dereference src if it can be nil (JM) }
  409. if len > 0 then
  410. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,DefaultSystemCodePage,len);
  411. len := length(temp);
  412. if len > length(res) then
  413. len := length(res);
  414. {$push}
  415. {$r-}
  416. move(temp[1],res[0],len);
  417. fillchar(res[len],length(res)-len,0);
  418. {$pop}
  419. end;
  420. procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
  421. var
  422. len: SizeInt;
  423. begin
  424. len := length(src);
  425. if len > length(res) then
  426. len := length(res);
  427. {$push}
  428. {$r-}
  429. { make sure we don't try to access element 1 of the ansistring if it's nil }
  430. if len > 0 then
  431. move(src[1],res[0],len*SizeOf(WideChar));
  432. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  433. {$pop}
  434. end;
  435. Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
  436. {
  437. Compares 2 WideStrings;
  438. The result is
  439. <0 if S1<S2
  440. 0 if S1=S2
  441. >0 if S1>S2
  442. }
  443. Var
  444. MaxI,Temp : SizeInt;
  445. begin
  446. if pointer(S1)=pointer(S2) then
  447. begin
  448. fpc_WideStr_Compare:=0;
  449. exit;
  450. end;
  451. Maxi:=Length(S1);
  452. temp:=Length(S2);
  453. If MaxI>Temp then
  454. MaxI:=Temp;
  455. Temp:=CompareWord(S1[1],S2[1],MaxI);
  456. if temp=0 then
  457. temp:=Length(S1)-Length(S2);
  458. fpc_WideStr_Compare:=Temp;
  459. end;
  460. Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
  461. {
  462. Compares 2 WideStrings for equality only;
  463. The result is
  464. 0 if S1=S2
  465. <>0 if S1<>S2
  466. }
  467. Var
  468. MaxI : SizeInt;
  469. begin
  470. if pointer(S1)=pointer(S2) then
  471. exit(0);
  472. Maxi:=Length(S1);
  473. If MaxI<>Length(S2) then
  474. exit(-1)
  475. else
  476. exit(CompareWord(S1[1],S2[1],MaxI));
  477. end;
  478. {$ifdef VER2_4}
  479. // obsolete but needed for bootstrapping with 2.4
  480. Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
  481. begin
  482. if p=nil then
  483. HandleErrorFrame(201,get_frame);
  484. end;
  485. Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  486. begin
  487. if (index>len div 2) or (Index<1) then
  488. HandleErrorFrame(201,get_frame);
  489. end;
  490. {$else VER2_4}
  491. Procedure fpc_WideStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  492. begin
  493. if (p=nil) or (index>PWideRec(p-WideFirstOff)^.len div 2) or (Index<1) then
  494. HandleErrorFrame(201,get_frame);
  495. end;
  496. {$endif VER2_4}
  497. Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
  498. {
  499. Sets The length of string S to L.
  500. Makes sure S is unique, and contains enough room.
  501. }
  502. Var
  503. Temp : Pointer;
  504. movelen: SizeInt;
  505. begin
  506. if (l>0) then
  507. begin
  508. if Pointer(S)=nil then
  509. begin
  510. { Need a complete new string...}
  511. Pointer(s):=NewWideString(l);
  512. end
  513. { windows doesn't support reallocing widestrings, this code
  514. is anyways subject to be removed because widestrings shouldn't be
  515. ref. counted anymore (FK) }
  516. else
  517. if
  518. {$ifdef MSWINDOWS}
  519. not winwidestringalloc and
  520. {$endif MSWINDOWS}
  521. True
  522. then
  523. begin
  524. Dec(Pointer(S),WideFirstOff);
  525. if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
  526. reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
  527. Inc(Pointer(S), WideFirstOff);
  528. end
  529. else
  530. begin
  531. { Reallocation is needed... }
  532. Temp:=Pointer(NewWideString(L));
  533. if Length(S)>0 then
  534. begin
  535. if l < succ(length(s)) then
  536. movelen := l
  537. { also move terminating null }
  538. else
  539. movelen := succ(length(s));
  540. Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
  541. end;
  542. fpc_widestr_decr_ref(Pointer(S));
  543. Pointer(S):=Temp;
  544. end;
  545. { Force nil termination in case it gets shorter }
  546. PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
  547. {$ifdef MSWINDOWS}
  548. if not winwidestringalloc then
  549. {$endif MSWINDOWS}
  550. PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
  551. end
  552. else
  553. begin
  554. { Length=0 }
  555. if Pointer(S)<>nil then
  556. fpc_widestr_decr_ref (Pointer(S));
  557. Pointer(S):=Nil;
  558. end;
  559. end;
  560. {*****************************************************************************
  561. Public functions, In interface.
  562. *****************************************************************************}
  563. Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
  564. begin
  565. pointer(result) := pointer(s);
  566. end;
  567. Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
  568. var
  569. ResultAddress : Pointer;
  570. begin
  571. ResultAddress:=Nil;
  572. dec(index);
  573. if Index < 0 then
  574. Index := 0;
  575. { Check Size. Accounts for Zero-length S, the double check is needed because
  576. Size can be maxint and will get <0 when adding index }
  577. if (Size>Length(S)) or
  578. (Index+Size>Length(S)) then
  579. Size:=Length(S)-Index;
  580. If Size>0 then
  581. begin
  582. If Index<0 Then
  583. Index:=0;
  584. ResultAddress:=Pointer(NewWideString (Size));
  585. if ResultAddress<>Nil then
  586. begin
  587. Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
  588. PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
  589. PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
  590. end;
  591. end;
  592. fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
  593. Pointer(fpc_widestr_Copy):=ResultAddress;
  594. end;
  595. Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
  596. var
  597. i,MaxLen : SizeInt;
  598. pc : pwidechar;
  599. begin
  600. Pos:=0;
  601. if Length(SubStr)>0 then
  602. begin
  603. MaxLen:=Length(source)-Length(SubStr);
  604. i:=0;
  605. pc:=@source[1];
  606. while (i<=MaxLen) do
  607. begin
  608. inc(i);
  609. if (SubStr[1]=pc^) and
  610. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  611. begin
  612. Pos:=i;
  613. exit;
  614. end;
  615. inc(pc);
  616. end;
  617. end;
  618. end;
  619. { Faster version for a widechar alone }
  620. Function Pos (c : WideChar; Const s : WideString) : SizeInt;
  621. var
  622. i: SizeInt;
  623. pc : pwidechar;
  624. begin
  625. pc:=@s[1];
  626. for i:=1 to length(s) do
  627. begin
  628. if pc^=c then
  629. begin
  630. pos:=i;
  631. exit;
  632. end;
  633. inc(pc);
  634. end;
  635. pos:=0;
  636. end;
  637. Function Pos (c : WideChar; Const s : RawByteString) : SizeInt;
  638. begin
  639. result:=Pos(c,WideString(s));
  640. end;
  641. Function Pos (c : RawByteString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  642. begin
  643. result:=Pos(WideString(c),s);
  644. end;
  645. Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  646. begin
  647. result:=Pos(WideString(c),s);
  648. end;
  649. Function Pos (c : WideString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  650. begin
  651. result:=Pos(c,WideString(s));
  652. end;
  653. { Faster version for a char alone. Must be implemented because }
  654. { pos(c: char; const s: shortstring) also exists, so otherwise }
  655. { using pos(char,pchar) will always call the shortstring version }
  656. { (exact match for first argument), also with $h+ (JM) }
  657. Function Pos (c : Char; Const s : WideString) : SizeInt;
  658. var
  659. i: SizeInt;
  660. wc : widechar;
  661. pc : pwidechar;
  662. begin
  663. wc:=c;
  664. pc:=@s[1];
  665. for i:=1 to length(s) do
  666. begin
  667. if pc^=wc then
  668. begin
  669. pos:=i;
  670. exit;
  671. end;
  672. inc(pc);
  673. end;
  674. pos:=0;
  675. end;
  676. Procedure Delete (Var S : WideString; Index,Size: SizeInt);
  677. Var
  678. LS : SizeInt;
  679. begin
  680. LS:=Length(S);
  681. if (Index>LS) or (Index<=0) or (Size<=0) then
  682. exit;
  683. UniqueString (S);
  684. { (Size+Index) will overflow if Size=MaxInt. }
  685. if Size>LS-Index then
  686. Size:=LS-Index+1;
  687. if Size<=LS-Index then
  688. begin
  689. Dec(Index);
  690. Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
  691. end;
  692. Setlength(s,LS-Size);
  693. end;
  694. Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
  695. var
  696. Temp : WideString;
  697. LS : SizeInt;
  698. begin
  699. If Length(Source)=0 then
  700. exit;
  701. if index <= 0 then
  702. index := 1;
  703. Ls:=Length(S);
  704. if index > LS then
  705. index := LS+1;
  706. Dec(Index);
  707. Pointer(Temp) := NewWideString(Length(Source)+LS);
  708. SetLength(Temp,Length(Source)+LS);
  709. If Index>0 then
  710. move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
  711. Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
  712. If (LS-Index)>0 then
  713. Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
  714. S:=Temp;
  715. end;
  716. function UpCase(const s : WideString) : WideString;
  717. begin
  718. result:=widestringmanager.UpperWideStringProc(s);
  719. end;
  720. Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
  721. begin
  722. SetLength(S,Len);
  723. If (Buf<>Nil) and (Len>0) then
  724. Move (Buf[0],S[1],Len*sizeof(WideChar));
  725. end;
  726. Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
  727. begin
  728. SetLength(S,Len);
  729. If (Buf<>Nil) and (Len>0) then
  730. widestringmanager.Ansi2WideMoveProc(Buf,DefaultSystemCodePage,S,Len);
  731. end;
  732. {$ifndef FPUNONE}
  733. Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
  734. Var
  735. SS : String;
  736. begin
  737. fpc_Val_Real_WideStr := 0;
  738. if length(S) > 255 then
  739. code := 256
  740. else
  741. begin
  742. SS := S;
  743. Val(SS,fpc_Val_Real_WideStr,code);
  744. end;
  745. end;
  746. {$endif}
  747. function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
  748. var ss:shortstring;
  749. begin
  750. if length(s)>255 then
  751. code:=256
  752. else
  753. begin
  754. ss:=s;
  755. val(ss,fpc_val_enum_widestr,code);
  756. end;
  757. end;
  758. Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
  759. Var
  760. SS : String;
  761. begin
  762. if length(S) > 255 then
  763. begin
  764. fpc_Val_Currency_WideStr:=0;
  765. code := 256;
  766. end
  767. else
  768. begin
  769. SS := S;
  770. Val(SS,fpc_Val_Currency_WideStr,code);
  771. end;
  772. end;
  773. Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
  774. Var
  775. SS : ShortString;
  776. begin
  777. fpc_Val_UInt_WideStr := 0;
  778. if length(S) > 255 then
  779. code := 256
  780. else
  781. begin
  782. SS := S;
  783. Val(SS,fpc_Val_UInt_WideStr,code);
  784. end;
  785. end;
  786. Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
  787. Var
  788. SS : ShortString;
  789. begin
  790. fpc_Val_SInt_WideStr:=0;
  791. if length(S)>255 then
  792. code:=256
  793. else
  794. begin
  795. SS := S;
  796. fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  797. end;
  798. end;
  799. {$ifndef CPU64}
  800. Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
  801. Var
  802. SS : ShortString;
  803. begin
  804. fpc_Val_qword_WideStr:=0;
  805. if length(S)>255 then
  806. code:=256
  807. else
  808. begin
  809. SS := S;
  810. Val(SS,fpc_Val_qword_WideStr,Code);
  811. end;
  812. end;
  813. Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
  814. Var
  815. SS : ShortString;
  816. begin
  817. fpc_Val_int64_WideStr:=0;
  818. if length(S)>255 then
  819. code:=256
  820. else
  821. begin
  822. SS := S;
  823. Val(SS,fpc_Val_int64_WideStr,Code);
  824. end;
  825. end;
  826. {$endif CPU64}
  827. {$ifndef FPUNONE}
  828. procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
  829. var
  830. ss : shortstring;
  831. begin
  832. str_real(len,fr,d,treal_type(rt),ss);
  833. s:=ss;
  834. end;
  835. {$endif}
  836. procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
  837. var ss:shortstring;
  838. begin
  839. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  840. s:=ss;
  841. end;
  842. procedure fpc_widestr_bool(b : boolean;len:sizeint;out s:widestring);compilerproc;
  843. var ss:shortstring;
  844. begin
  845. fpc_shortstr_bool(b,len,ss);
  846. s:=ss;
  847. end;
  848. {$ifdef FPC_HAS_STR_CURRENCY}
  849. procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
  850. var
  851. ss : shortstring;
  852. begin
  853. str(c:len:fr,ss);
  854. s:=ss;
  855. end;
  856. {$endif FPC_HAS_STR_CURRENCY}
  857. Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
  858. Var
  859. SS : ShortString;
  860. begin
  861. Str (v:Len,SS);
  862. S:=SS;
  863. end;
  864. Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
  865. Var
  866. SS : ShortString;
  867. begin
  868. str(v:Len,SS);
  869. S:=SS;
  870. end;
  871. {$ifndef CPU64}
  872. Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
  873. Var
  874. SS : ShortString;
  875. begin
  876. Str (v:Len,SS);
  877. S:=SS;
  878. end;
  879. Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
  880. Var
  881. SS : ShortString;
  882. begin
  883. str(v:Len,SS);
  884. S:=SS;
  885. end;
  886. {$endif CPU64}
  887. { converts an utf-16 code point or surrogate pair to utf-32 }
  888. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
  889. var
  890. w: widechar;
  891. begin
  892. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  893. { are the same in UTF-32 }
  894. w:=s[index];
  895. if (w<=#$d7ff) or
  896. (w>=#$e000) then
  897. begin
  898. result:=UCS4Char(w);
  899. len:=1;
  900. end
  901. { valid surrogate pair? }
  902. else if (w<=#$dbff) and
  903. { w>=#$d7ff check not needed, checked above }
  904. (index<length(s)) and
  905. (s[index+1]>=#$dc00) and
  906. (s[index+1]<=#$dfff) then
  907. { convert the surrogate pair to UTF-32 }
  908. begin
  909. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  910. len:=2;
  911. end
  912. else
  913. { invalid surrogate -> do nothing }
  914. begin
  915. result:=UCS4Char(w);
  916. len:=1;
  917. end;
  918. end;
  919. function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  920. begin
  921. if assigned(Source) then
  922. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  923. else
  924. Result:=0;
  925. end;
  926. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
  927. var
  928. i,j : SizeUInt;
  929. w : word;
  930. lw : longword;
  931. len : longint;
  932. begin
  933. result:=0;
  934. if source=nil then
  935. exit;
  936. i:=0;
  937. j:=0;
  938. if assigned(Dest) then
  939. begin
  940. while (i<SourceChars) and (j<MaxDestBytes) do
  941. begin
  942. w:=word(Source[i]);
  943. case w of
  944. 0..$7f:
  945. begin
  946. Dest[j]:=char(w);
  947. inc(j);
  948. end;
  949. $80..$7ff:
  950. begin
  951. if j+1>=MaxDestBytes then
  952. break;
  953. Dest[j]:=char($c0 or (w shr 6));
  954. Dest[j+1]:=char($80 or (w and $3f));
  955. inc(j,2);
  956. end;
  957. $800..$d7ff,$e000..$ffff:
  958. begin
  959. if j+2>=MaxDestBytes then
  960. break;
  961. Dest[j]:=char($e0 or (w shr 12));
  962. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  963. Dest[j+2]:=char($80 or (w and $3f));
  964. inc(j,3);
  965. end;
  966. $d800..$dbff:
  967. {High Surrogates}
  968. begin
  969. if j+3>=MaxDestBytes then
  970. break;
  971. if (i<sourcechars-1) and
  972. (word(Source[i+1]) >= $dc00) and
  973. (word(Source[i+1]) <= $dfff) then
  974. begin
  975. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  976. Dest[j]:=char($f0 or (lw shr 18));
  977. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  978. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  979. Dest[j+3]:=char($80 or (lw and $3f));
  980. inc(j,4);
  981. inc(i);
  982. end;
  983. end;
  984. end;
  985. inc(i);
  986. end;
  987. if j>SizeUInt(MaxDestBytes-1) then
  988. j:=MaxDestBytes-1;
  989. Dest[j]:=#0;
  990. end
  991. else
  992. begin
  993. while i<SourceChars do
  994. begin
  995. case word(Source[i]) of
  996. $0..$7f:
  997. inc(j);
  998. $80..$7ff:
  999. inc(j,2);
  1000. $800..$d7ff,$e000..$ffff:
  1001. inc(j,3);
  1002. $d800..$dbff:
  1003. begin
  1004. if (i<sourcechars-1) and
  1005. (word(Source[i+1]) >= $dc00) and
  1006. (word(Source[i+1]) <= $dfff) then
  1007. begin
  1008. inc(j,4);
  1009. inc(i);
  1010. end;
  1011. end;
  1012. end;
  1013. inc(i);
  1014. end;
  1015. end;
  1016. result:=j+1;
  1017. end;
  1018. function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1019. begin
  1020. if assigned(Source) then
  1021. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1022. else
  1023. Result:=0;
  1024. end;
  1025. function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1026. const
  1027. UNICODE_INVALID=63;
  1028. var
  1029. InputUTF8: SizeUInt;
  1030. IBYTE: BYTE;
  1031. OutputUnicode: SizeUInt;
  1032. PRECHAR: SizeUInt;
  1033. TempBYTE: BYTE;
  1034. CharLen: SizeUint;
  1035. LookAhead: SizeUInt;
  1036. UC: SizeUInt;
  1037. begin
  1038. if not assigned(Source) then
  1039. begin
  1040. result:=0;
  1041. exit;
  1042. end;
  1043. result:=SizeUInt(-1);
  1044. InputUTF8:=0;
  1045. OutputUnicode:=0;
  1046. PreChar:=0;
  1047. if Assigned(Dest) Then
  1048. begin
  1049. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1050. begin
  1051. IBYTE:=byte(Source[InputUTF8]);
  1052. if (IBYTE and $80) = 0 then
  1053. begin
  1054. //One character US-ASCII, convert it to unicode
  1055. if IBYTE = 10 then
  1056. begin
  1057. If (PreChar<>13) and FALSE then
  1058. begin
  1059. //Expand to crlf, conform UTF-8.
  1060. //This procedure will break the memory alocation by
  1061. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1062. if OutputUnicode+1<MaxDestChars then
  1063. begin
  1064. Dest[OutputUnicode]:=WideChar(13);
  1065. inc(OutputUnicode);
  1066. Dest[OutputUnicode]:=WideChar(10);
  1067. inc(OutputUnicode);
  1068. PreChar:=10;
  1069. end
  1070. else
  1071. begin
  1072. Dest[OutputUnicode]:=WideChar(13);
  1073. inc(OutputUnicode);
  1074. end;
  1075. end
  1076. else
  1077. begin
  1078. Dest[OutputUnicode]:=WideChar(IBYTE);
  1079. inc(OutputUnicode);
  1080. PreChar:=IBYTE;
  1081. end;
  1082. end
  1083. else
  1084. begin
  1085. Dest[OutputUnicode]:=WideChar(IBYTE);
  1086. inc(OutputUnicode);
  1087. PreChar:=IBYTE;
  1088. end;
  1089. inc(InputUTF8);
  1090. end
  1091. else
  1092. begin
  1093. TempByte:=IBYTE;
  1094. CharLen:=0;
  1095. while (TempBYTE and $80)<>0 do
  1096. begin
  1097. TempBYTE:=(TempBYTE shl 1) and $FE;
  1098. inc(CharLen);
  1099. end;
  1100. //Test for the "CharLen" conforms UTF-8 string
  1101. //This means the 10xxxxxx pattern.
  1102. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1103. begin
  1104. //Insuficient chars in string to decode
  1105. //UTF-8 array. Fallback to single char.
  1106. CharLen:= 1;
  1107. end;
  1108. for LookAhead := 1 to CharLen-1 do
  1109. begin
  1110. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1111. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1112. begin
  1113. //Invalid UTF-8 sequence, fallback.
  1114. CharLen:= LookAhead;
  1115. break;
  1116. end;
  1117. end;
  1118. UC:=$FFFF;
  1119. case CharLen of
  1120. 1: begin
  1121. //Not valid UTF-8 sequence
  1122. UC:=UNICODE_INVALID;
  1123. end;
  1124. 2: begin
  1125. //Two bytes UTF, convert it
  1126. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1127. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1128. if UC <= $7F then
  1129. begin
  1130. //Invalid UTF sequence.
  1131. UC:=UNICODE_INVALID;
  1132. end;
  1133. end;
  1134. 3: begin
  1135. //Three bytes, convert it to unicode
  1136. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1137. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1138. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1139. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1140. begin
  1141. //Invalid UTF-8 sequence
  1142. UC:= UNICODE_INVALID;
  1143. End;
  1144. end;
  1145. 4: begin
  1146. //Four bytes, convert it to two unicode characters
  1147. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1148. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1149. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1150. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1151. if (UC < $10000) or (UC > $10FFFF) then
  1152. begin
  1153. UC:= UNICODE_INVALID;
  1154. end
  1155. else
  1156. begin
  1157. { only store pair if room }
  1158. dec(UC,$10000);
  1159. if (OutputUnicode<MaxDestChars-1) then
  1160. begin
  1161. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1162. inc(OutputUnicode);
  1163. UC:=(UC and $3ff) + $DC00;
  1164. end
  1165. else
  1166. begin
  1167. InputUTF8:= InputUTF8 + CharLen;
  1168. { don't store anything }
  1169. CharLen:=0;
  1170. end;
  1171. end;
  1172. end;
  1173. 5,6,7: begin
  1174. //Invalid UTF8 to unicode conversion,
  1175. //mask it as invalid UNICODE too.
  1176. UC:=UNICODE_INVALID;
  1177. end;
  1178. end;
  1179. if CharLen > 0 then
  1180. begin
  1181. PreChar:=UC;
  1182. Dest[OutputUnicode]:=WideChar(UC);
  1183. inc(OutputUnicode);
  1184. end;
  1185. InputUTF8:= InputUTF8 + CharLen;
  1186. end;
  1187. end;
  1188. Result:=OutputUnicode+1;
  1189. end
  1190. else
  1191. begin
  1192. while (InputUTF8<SourceBytes) do
  1193. begin
  1194. IBYTE:=byte(Source[InputUTF8]);
  1195. if (IBYTE and $80) = 0 then
  1196. begin
  1197. //One character US-ASCII, convert it to unicode
  1198. if IBYTE = 10 then
  1199. begin
  1200. if (PreChar<>13) and FALSE then
  1201. begin
  1202. //Expand to crlf, conform UTF-8.
  1203. //This procedure will break the memory alocation by
  1204. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1205. inc(OutputUnicode,2);
  1206. PreChar:=10;
  1207. end
  1208. else
  1209. begin
  1210. inc(OutputUnicode);
  1211. PreChar:=IBYTE;
  1212. end;
  1213. end
  1214. else
  1215. begin
  1216. inc(OutputUnicode);
  1217. PreChar:=IBYTE;
  1218. end;
  1219. inc(InputUTF8);
  1220. end
  1221. else
  1222. begin
  1223. TempByte:=IBYTE;
  1224. CharLen:=0;
  1225. while (TempBYTE and $80)<>0 do
  1226. begin
  1227. TempBYTE:=(TempBYTE shl 1) and $FE;
  1228. inc(CharLen);
  1229. end;
  1230. //Test for the "CharLen" conforms UTF-8 string
  1231. //This means the 10xxxxxx pattern.
  1232. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1233. begin
  1234. //Insuficient chars in string to decode
  1235. //UTF-8 array. Fallback to single char.
  1236. CharLen:= 1;
  1237. end;
  1238. for LookAhead := 1 to CharLen-1 do
  1239. begin
  1240. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1241. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1242. begin
  1243. //Invalid UTF-8 sequence, fallback.
  1244. CharLen:= LookAhead;
  1245. break;
  1246. end;
  1247. end;
  1248. UC:=$FFFF;
  1249. case CharLen of
  1250. 1: begin
  1251. //Not valid UTF-8 sequence
  1252. UC:=UNICODE_INVALID;
  1253. end;
  1254. 2: begin
  1255. //Two bytes UTF, convert it
  1256. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1257. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1258. if UC <= $7F then
  1259. begin
  1260. //Invalid UTF sequence.
  1261. UC:=UNICODE_INVALID;
  1262. end;
  1263. end;
  1264. 3: begin
  1265. //Three bytes, convert it to unicode
  1266. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1267. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1268. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1269. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1270. begin
  1271. //Invalid UTF-8 sequence
  1272. UC:= UNICODE_INVALID;
  1273. end;
  1274. end;
  1275. 4: begin
  1276. //Four bytes, convert it to two unicode characters
  1277. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1278. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1279. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1280. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1281. if (UC < $10000) or (UC > $10FFFF) then
  1282. UC:= UNICODE_INVALID
  1283. else
  1284. { extra character character }
  1285. inc(OutputUnicode);
  1286. end;
  1287. 5,6,7: begin
  1288. //Invalid UTF8 to unicode conversion,
  1289. //mask it as invalid UNICODE too.
  1290. UC:=UNICODE_INVALID;
  1291. end;
  1292. end;
  1293. if CharLen > 0 then
  1294. begin
  1295. PreChar:=UC;
  1296. inc(OutputUnicode);
  1297. end;
  1298. InputUTF8:= InputUTF8 + CharLen;
  1299. end;
  1300. end;
  1301. Result:=OutputUnicode+1;
  1302. end;
  1303. end;
  1304. function UTF8Encode(const s : WideString) : RawByteString;
  1305. var
  1306. i : SizeInt;
  1307. hs : UTF8String;
  1308. begin
  1309. result:='';
  1310. if s='' then
  1311. exit;
  1312. SetLength(hs,length(s)*3);
  1313. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
  1314. if i>0 then
  1315. begin
  1316. SetLength(hs,i-1);
  1317. result:=hs;
  1318. end;
  1319. end;
  1320. const
  1321. SNoWidestrings = 'This binary has no widestrings support compiled in.';
  1322. SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
  1323. procedure unimplementedwidestring;
  1324. begin
  1325. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1326. If IsConsole then
  1327. begin
  1328. Writeln(StdErr,SNoWidestrings);
  1329. Writeln(StdErr,SRecompileWithWidestrings);
  1330. end;
  1331. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1332. HandleErrorFrame(233,get_frame);
  1333. end;
  1334. {$warnings off}
  1335. function GenericWideCase(const s : WideString) : WideString;
  1336. begin
  1337. unimplementedwidestring;
  1338. end;
  1339. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  1340. begin
  1341. unimplementedwidestring;
  1342. end;
  1343. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  1344. begin
  1345. unimplementedwidestring;
  1346. end;
  1347. {$warnings on}
  1348. function DefaultCharLengthPChar(const Str: PChar): PtrInt;forward;
  1349. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;forward;
  1350. procedure initwidestringmanager;
  1351. begin
  1352. fillchar(widestringmanager,sizeof(widestringmanager),0);
  1353. {$ifndef HAS_WIDESTRINGMANAGER}
  1354. widestringmanager.Wide2AnsiMoveProc:=@DefaultWide2AnsiMove;
  1355. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  1356. widestringmanager.UpperWideStringProc:=@GenericWideCase;
  1357. widestringmanager.LowerWideStringProc:=@GenericWideCase;
  1358. {$endif HAS_WIDESTRINGMANAGER}
  1359. widestringmanager.CompareWideStringProc:=@CompareWideString;
  1360. widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
  1361. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1362. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1363. end;