wstrings.inc 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for WideStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. This file contains the implementation of the WideString type,
  14. and all things that are needed for it.
  15. WideString is defined as a 'silent' pwidechar :
  16. a pwidechar that points to :
  17. @-8 : SizeInt for reference count;
  18. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  19. with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
  20. Windows COM BSTR.
  21. @ : String + Terminating #0;
  22. Pwidechar(Widestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PWideRec = ^TWideRec;
  29. TWideRec = Packed Record
  30. Len : DWord;
  31. First : WideChar;
  32. end;
  33. Const
  34. WideRecLen = SizeOf(TWideRec);
  35. WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
  36. {
  37. Default WideChar <-> Char conversion is to only convert the
  38. lower 127 chars, all others are translated to spaces.
  39. These routines can be overwritten for the Current Locale
  40. }
  41. procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
  42. var
  43. i : SizeInt;
  44. destp: PChar;
  45. begin
  46. setlength(dest,len);
  47. destp := PChar(Pointer(dest));
  48. for i:=1 to len do
  49. begin
  50. if word(source^)<256 then
  51. destp^:=char(word(source^))
  52. else
  53. destp^:='?';
  54. inc(source);
  55. inc(destp);
  56. end;
  57. end;
  58. procedure DefaultAnsi2WideMove(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. begin
  62. setlength(dest,len);
  63. for i:=1 to len do
  64. begin
  65. dest[i]:=widechar(byte(source^));
  66. inc(source);
  67. end;
  68. end;
  69. (*
  70. Procedure UniqueWideString(Var S : WideString); [Public,Alias : 'FPC_WIDESTR_UNIQUE'];
  71. {
  72. Make sure reference count of S is 1,
  73. using copy-on-write semantics.
  74. }
  75. begin
  76. end;
  77. *)
  78. {****************************************************************************
  79. Internal functions, not in interface.
  80. ****************************************************************************}
  81. procedure WideStringError;
  82. begin
  83. HandleErrorFrame(204,get_frame);
  84. end;
  85. {$ifdef WideStrDebug}
  86. Procedure DumpWideRec(S : Pointer);
  87. begin
  88. If S=Nil then
  89. Writeln ('String is nil')
  90. Else
  91. Begin
  92. With PWideRec(S-WideFirstOff)^ do
  93. begin
  94. Write ('(Len:',len);
  95. Writeln (' Ref: ',ref,')');
  96. end;
  97. end;
  98. end;
  99. {$endif}
  100. Function NewWideString(Len : SizeInt) : Pointer;
  101. {
  102. Allocate a new WideString on the heap.
  103. initialize it to zero length and reference count 1.
  104. }
  105. Var
  106. P : Pointer;
  107. begin
  108. {$ifdef MSWINDOWS}
  109. if winwidestringalloc then
  110. begin
  111. P:=SysAllocStringLen(nil,Len);
  112. if P=nil then
  113. WideStringError;
  114. end
  115. else
  116. {$endif MSWINDOWS}
  117. begin
  118. GetMem(P,Len*sizeof(WideChar)+WideRecLen);
  119. If P<>Nil then
  120. begin
  121. PWideRec(P)^.Len:=Len*2; { Initial length }
  122. PWideRec(P)^.First:=#0; { Terminating #0 }
  123. inc(p,WideFirstOff); { Points to string now }
  124. end
  125. else
  126. WideStringError;
  127. end;
  128. NewWideString:=P;
  129. end;
  130. Procedure DisposeWideString(Var S : Pointer);
  131. {
  132. Deallocates a WideString From the heap.
  133. }
  134. begin
  135. If S=Nil then
  136. exit;
  137. {$ifndef MSWINDOWS}
  138. Dec (S,WideFirstOff);
  139. Freemem(S);
  140. {$else MSWINDOWS}
  141. if winwidestringalloc then
  142. SysFreeString(S)
  143. else
  144. begin
  145. Dec (S,WideFirstOff);
  146. Freemem(S);
  147. end;
  148. {$endif MSWINDOWS}
  149. S:=Nil;
  150. end;
  151. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
  152. {
  153. Decreases the ReferenceCount of a non constant widestring;
  154. If the reference count is zero, deallocate the string;
  155. }
  156. Begin
  157. DisposeWideString(S); { does test for nil }
  158. end;
  159. { alias for internal use }
  160. Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
  161. Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
  162. var
  163. p : pointer;
  164. Begin
  165. If S=Nil then
  166. exit;
  167. p:=NewWidestring(length(WideString(S)));
  168. move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
  169. s:=p;
  170. end;
  171. { alias for internal use }
  172. Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
  173. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  174. function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
  175. {
  176. Converts a WideString to a ShortString;
  177. }
  178. Var
  179. Size : SizeInt;
  180. temp : ansistring;
  181. begin
  182. result:='';
  183. Size:=Length(S2);
  184. if Size>0 then
  185. begin
  186. If Size>high_of_res then
  187. Size:=high_of_res;
  188. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
  189. result:=temp;
  190. end;
  191. end;
  192. {$else FPC_STRTOSHORTSTRINGPROC}
  193. procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
  194. {
  195. Converts a WideString to a ShortString;
  196. }
  197. Var
  198. Size : SizeInt;
  199. temp : ansistring;
  200. begin
  201. res:='';
  202. Size:=Length(S2);
  203. if Size>0 then
  204. begin
  205. If Size>high(res) then
  206. Size:=high(res);
  207. widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,DefaultSystemCodePage,Size);
  208. res:=temp;
  209. end;
  210. end;
  211. {$endif FPC_STRTOSHORTSTRINGPROC}
  212. Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
  213. {
  214. Converts a ShortString to a WideString;
  215. }
  216. Var
  217. Size : SizeInt;
  218. begin
  219. result:='';
  220. Size:=Length(S2);
  221. if Size>0 then
  222. begin
  223. widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  224. { Terminating Zero }
  225. PWideChar(Pointer(fpc_ShortStr_To_WideStr)+Size*sizeof(WideChar))^:=#0;
  226. end;
  227. end;
  228. Function fpc_WideStr_To_AnsiStr (const S2 : WideString;cp : TSystemCodePage): AnsiString; compilerproc;
  229. {
  230. Converts a WideString to an AnsiString
  231. }
  232. Var
  233. Size : SizeInt;
  234. begin
  235. result:='';
  236. Size:=Length(S2);
  237. if Size>0 then
  238. widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,cp,Size);
  239. end;
  240. Function fpc_AnsiStr_To_WideStr (Const S2 : AnsiString): WideString; compilerproc;
  241. {
  242. Converts an AnsiString to a WideString;
  243. }
  244. Var
  245. Size : SizeInt;
  246. begin
  247. result:='';
  248. Size:=Length(S2);
  249. if Size>0 then
  250. widestringmanager.Ansi2WideMoveProc(PChar(S2),StringCodePage(S2),result,Size);
  251. end;
  252. Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
  253. var
  254. Size : SizeInt;
  255. begin
  256. result:='';
  257. if p=nil then
  258. exit;
  259. Size := IndexWord(p^, -1, 0);
  260. Setlength(result,Size);
  261. if Size>0 then
  262. begin
  263. Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
  264. { Terminating Zero }
  265. PWideChar(Pointer(result)+Size*sizeof(WideChar))^:=#0;
  266. end;
  267. end;
  268. { checked against the ansistring routine, 2001-05-27 (FK) }
  269. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
  270. {
  271. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  272. }
  273. begin
  274. if S1=S2 then exit;
  275. if S2<>nil then
  276. begin
  277. {$ifdef MSWINDOWS}
  278. if winwidestringalloc then
  279. begin
  280. if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
  281. WideStringError;
  282. end
  283. else
  284. {$endif MSWINDOWS}
  285. begin
  286. SetLength(WideString(S1),length(WideString(S2)));
  287. move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
  288. end;
  289. end
  290. else
  291. begin
  292. { Free S1 }
  293. fpc_widestr_decr_ref (S1);
  294. S1:=nil;
  295. end;
  296. end;
  297. { alias for internal use }
  298. Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
  299. {$ifndef STR_CONCAT_PROCS}
  300. function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
  301. Var
  302. Size,Location : SizeInt;
  303. pc : pwidechar;
  304. begin
  305. { only assign if s1 or s2 is empty }
  306. if (S1='') then
  307. begin
  308. result:=s2;
  309. exit;
  310. end;
  311. if (S2='') then
  312. begin
  313. result:=s1;
  314. exit;
  315. end;
  316. Location:=Length(S1);
  317. Size:=length(S2);
  318. SetLength(result,Size+Location);
  319. pc:=pwidechar(result);
  320. Move(S1[1],pc^,Location*sizeof(WideChar));
  321. inc(pc,location);
  322. Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
  323. end;
  324. function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
  325. Var
  326. i : Longint;
  327. p : pointer;
  328. pc : pwidechar;
  329. Size,NewSize : SizeInt;
  330. begin
  331. { First calculate size of the result so we can do
  332. a single call to SetLength() }
  333. NewSize:=0;
  334. for i:=low(sarr) to high(sarr) do
  335. inc(Newsize,length(sarr[i]));
  336. SetLength(result,NewSize);
  337. pc:=pwidechar(result);
  338. for i:=low(sarr) to high(sarr) do
  339. begin
  340. p:=pointer(sarr[i]);
  341. if assigned(p) then
  342. begin
  343. Size:=length(widestring(p));
  344. Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
  345. inc(pc,size);
  346. end;
  347. end;
  348. end;
  349. {$else STR_CONCAT_PROCS}
  350. procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
  351. Var
  352. Size,Location : SizeInt;
  353. same : boolean;
  354. begin
  355. { only assign if s1 or s2 is empty }
  356. if (S1='') then
  357. begin
  358. DestS:=s2;
  359. exit;
  360. end;
  361. if (S2='') then
  362. begin
  363. DestS:=s1;
  364. exit;
  365. end;
  366. Location:=Length(S1);
  367. Size:=length(S2);
  368. { Use Pointer() typecasts to prevent extra conversion code }
  369. if Pointer(DestS)=Pointer(S1) then
  370. begin
  371. same:=Pointer(S1)=Pointer(S2);
  372. SetLength(DestS,Size+Location);
  373. if same then
  374. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
  375. else
  376. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  377. end
  378. else if Pointer(DestS)=Pointer(S2) then
  379. begin
  380. SetLength(DestS,Size+Location);
  381. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  382. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  383. end
  384. else
  385. begin
  386. DestS:='';
  387. SetLength(DestS,Size+Location);
  388. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
  389. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
  390. end;
  391. end;
  392. procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
  393. Var
  394. i : Longint;
  395. p,pc : pointer;
  396. Size,NewLen : SizeInt;
  397. DestTmp : Widestring;
  398. begin
  399. if high(sarr)=0 then
  400. begin
  401. DestS:='';
  402. exit;
  403. end;
  404. { First calculate size of the result so we can do
  405. a single call to SetLength() }
  406. NewLen:=0;
  407. for i:=low(sarr) to high(sarr) do
  408. inc(NewLen,length(sarr[i]));
  409. SetLength(DestTmp,NewLen);
  410. pc:=pwidechar(DestTmp);
  411. for i:=low(sarr) to high(sarr) do
  412. begin
  413. p:=pointer(sarr[i]);
  414. if assigned(p) then
  415. begin
  416. Size:=length(widestring(p));
  417. Move(p^,pc^,(Size+1)*sizeof(WideChar));
  418. inc(pc,size*sizeof(WideChar));
  419. end;
  420. end;
  421. DestS:=DestTmp;
  422. end;
  423. {$endif STR_CONCAT_PROCS}
  424. Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
  425. {
  426. Converts a Char to a WideString;
  427. }
  428. begin
  429. Setlength(fpc_Char_To_WideStr,1);
  430. fpc_Char_To_WideStr[1]:=c;
  431. { Terminating Zero }
  432. PWideChar(Pointer(fpc_Char_To_WideStr)+sizeof(WideChar))^:=#0;
  433. end;
  434. Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  435. {
  436. Converts a WideChar to a WideString;
  437. }
  438. begin
  439. Setlength (fpc_WChar_To_WideStr,1);
  440. fpc_WChar_To_WideStr[1]:= c;
  441. end;
  442. Function fpc_WChar_To_AnsiStr(const c : WideChar;cp : TSystemCodePage): AnsiString; compilerproc;
  443. {
  444. Converts a WideChar to a AnsiString;
  445. }
  446. begin
  447. widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr, cp, 1);
  448. end;
  449. Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
  450. {
  451. Converts a WideChar to a WideString;
  452. }
  453. begin
  454. Setlength (fpc_UChar_To_WideStr,1);
  455. fpc_UChar_To_WideStr[1]:= c;
  456. end;
  457. Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
  458. Var
  459. L : SizeInt;
  460. begin
  461. if (not assigned(p)) or (p[0]=#0) Then
  462. begin
  463. fpc_pchar_to_widestr := '';
  464. exit;
  465. end;
  466. l:=IndexChar(p^,-1,#0);
  467. widestringmanager.Ansi2WideMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_WideStr,l);
  468. end;
  469. Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
  470. var
  471. i : SizeInt;
  472. begin
  473. if (zerobased) then
  474. begin
  475. if (arr[0]=#0) Then
  476. begin
  477. fpc_chararray_to_widestr := '';
  478. exit;
  479. end;
  480. i:=IndexChar(arr,high(arr)+1,#0);
  481. if i = -1 then
  482. i := high(arr)+1;
  483. end
  484. else
  485. i := high(arr)+1;
  486. SetLength(fpc_CharArray_To_WideStr,i);
  487. widestringmanager.Ansi2WideMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_WideStr,i);
  488. end;
  489. {$ifndef FPC_STRTOCHARARRAYPROC}
  490. { inside the compiler, the resulttype is modified to that of the actual }
  491. { chararray we're converting to (JM) }
  492. function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
  493. var
  494. len: SizeInt;
  495. temp: ansistring;
  496. begin
  497. len := length(src);
  498. { make sure we don't dereference src if it can be nil (JM) }
  499. if len > 0 then
  500. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
  501. len := length(temp);
  502. if len > arraysize then
  503. len := arraysize;
  504. {$r-}
  505. move(temp[1],fpc_widestr_to_chararray[0],len);
  506. fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
  507. {$ifdef RangeCheckWasOn}
  508. {$r+}
  509. {$endif}
  510. end;
  511. { inside the compiler, the resulttype is modified to that of the actual }
  512. { widechararray we're converting to (JM) }
  513. function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
  514. var
  515. len: SizeInt;
  516. begin
  517. len := length(src);
  518. if len > arraysize then
  519. len := arraysize;
  520. {$r-}
  521. { make sure we don't try to access element 1 of the ansistring if it's nil }
  522. if len > 0 then
  523. move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
  524. fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  525. {$ifdef RangeCheckWasOn}
  526. {$r+}
  527. {$endif}
  528. end;
  529. { inside the compiler, the resulttype is modified to that of the actual }
  530. { chararray we're converting to (JM) }
  531. function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
  532. var
  533. len: SizeInt;
  534. temp: widestring;
  535. begin
  536. len := length(src);
  537. { make sure we don't dereference src if it can be nil (JM) }
  538. if len > 0 then
  539. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  540. len := length(temp);
  541. if len > arraysize then
  542. len := arraysize;
  543. {$r-}
  544. move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
  545. fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  546. {$ifdef RangeCheckWasOn}
  547. {$r+}
  548. {$endif}
  549. end;
  550. function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
  551. var
  552. len: longint;
  553. temp : widestring;
  554. begin
  555. len := length(src);
  556. { make sure we don't access char 1 if length is 0 (JM) }
  557. if len > 0 then
  558. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  559. len := length(temp);
  560. if len > arraysize then
  561. len := arraysize;
  562. {$r-}
  563. move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
  564. fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
  565. {$ifdef RangeCheckWasOn}
  566. {$r+}
  567. {$endif}
  568. end;
  569. {$else ndef FPC_STRTOCHARARRAYPROC}
  570. procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
  571. var
  572. len: SizeInt;
  573. temp: ansistring;
  574. begin
  575. len := length(src);
  576. { make sure we don't dereference src if it can be nil (JM) }
  577. if len > 0 then
  578. widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,DefaultSystemCodePage,len);
  579. len := length(temp);
  580. if len > length(res) then
  581. len := length(res);
  582. {$r-}
  583. move(temp[1],res[0],len);
  584. fillchar(res[len],length(res)-len,0);
  585. {$ifdef RangeCheckWasOn}
  586. {$r+}
  587. {$endif}
  588. end;
  589. procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
  590. var
  591. len: SizeInt;
  592. begin
  593. len := length(src);
  594. if len > length(res) then
  595. len := length(res);
  596. {$r-}
  597. { make sure we don't try to access element 1 of the ansistring if it's nil }
  598. if len > 0 then
  599. move(src[1],res[0],len*SizeOf(WideChar));
  600. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  601. {$ifdef RangeCheckWasOn}
  602. {$r+}
  603. {$endif}
  604. end;
  605. {$endif ndef FPC_STRTOCHARARRAYPROC}
  606. Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
  607. {
  608. Compares 2 WideStrings;
  609. The result is
  610. <0 if S1<S2
  611. 0 if S1=S2
  612. >0 if S1>S2
  613. }
  614. Var
  615. MaxI,Temp : SizeInt;
  616. begin
  617. if pointer(S1)=pointer(S2) then
  618. begin
  619. fpc_WideStr_Compare:=0;
  620. exit;
  621. end;
  622. Maxi:=Length(S1);
  623. temp:=Length(S2);
  624. If MaxI>Temp then
  625. MaxI:=Temp;
  626. Temp:=CompareWord(S1[1],S2[1],MaxI);
  627. if temp=0 then
  628. temp:=Length(S1)-Length(S2);
  629. fpc_WideStr_Compare:=Temp;
  630. end;
  631. Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
  632. {
  633. Compares 2 WideStrings for equality only;
  634. The result is
  635. 0 if S1=S2
  636. <>0 if S1<>S2
  637. }
  638. Var
  639. MaxI : SizeInt;
  640. begin
  641. if pointer(S1)=pointer(S2) then
  642. exit(0);
  643. Maxi:=Length(S1);
  644. If MaxI<>Length(S2) then
  645. exit(-1)
  646. else
  647. exit(CompareWord(S1[1],S2[1],MaxI));
  648. end;
  649. {$ifdef VER2_4}
  650. // obsolete but needed for bootstrapping with 2.4
  651. Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
  652. begin
  653. if p=nil then
  654. HandleErrorFrame(201,get_frame);
  655. end;
  656. Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  657. begin
  658. if (index>len div 2) or (Index<1) then
  659. HandleErrorFrame(201,get_frame);
  660. end;
  661. {$else VER2_4}
  662. Procedure fpc_WideStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
  663. begin
  664. if (p=nil) or (index>PWideRec(p-WideFirstOff)^.len div 2) or (Index<1) then
  665. HandleErrorFrame(201,get_frame);
  666. end;
  667. {$endif VER2_4}
  668. Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
  669. {
  670. Sets The length of string S to L.
  671. Makes sure S is unique, and contains enough room.
  672. }
  673. Var
  674. Temp : Pointer;
  675. movelen: SizeInt;
  676. begin
  677. if (l>0) then
  678. begin
  679. if Pointer(S)=nil then
  680. begin
  681. { Need a complete new string...}
  682. Pointer(s):=NewWideString(l);
  683. end
  684. { windows doesn't support reallocing widestrings, this code
  685. is anyways subject to be removed because widestrings shouldn't be
  686. ref. counted anymore (FK) }
  687. else
  688. if
  689. {$ifdef MSWINDOWS}
  690. not winwidestringalloc and
  691. {$endif MSWINDOWS}
  692. True
  693. then
  694. begin
  695. Dec(Pointer(S),WideFirstOff);
  696. if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
  697. reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
  698. Inc(Pointer(S), WideFirstOff);
  699. end
  700. else
  701. begin
  702. { Reallocation is needed... }
  703. Temp:=Pointer(NewWideString(L));
  704. if Length(S)>0 then
  705. begin
  706. if l < succ(length(s)) then
  707. movelen := l
  708. { also move terminating null }
  709. else
  710. movelen := succ(length(s));
  711. Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
  712. end;
  713. fpc_widestr_decr_ref(Pointer(S));
  714. Pointer(S):=Temp;
  715. end;
  716. { Force nil termination in case it gets shorter }
  717. PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
  718. {$ifdef MSWINDOWS}
  719. if not winwidestringalloc then
  720. {$endif MSWINDOWS}
  721. PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
  722. end
  723. else
  724. begin
  725. { Length=0 }
  726. if Pointer(S)<>nil then
  727. fpc_widestr_decr_ref (Pointer(S));
  728. Pointer(S):=Nil;
  729. end;
  730. end;
  731. {*****************************************************************************
  732. Public functions, In interface.
  733. *****************************************************************************}
  734. Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
  735. begin
  736. pointer(result) := pointer(s);
  737. end;
  738. Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
  739. var
  740. ResultAddress : Pointer;
  741. begin
  742. ResultAddress:=Nil;
  743. dec(index);
  744. if Index < 0 then
  745. Index := 0;
  746. { Check Size. Accounts for Zero-length S, the double check is needed because
  747. Size can be maxint and will get <0 when adding index }
  748. if (Size>Length(S)) or
  749. (Index+Size>Length(S)) then
  750. Size:=Length(S)-Index;
  751. If Size>0 then
  752. begin
  753. If Index<0 Then
  754. Index:=0;
  755. ResultAddress:=Pointer(NewWideString (Size));
  756. if ResultAddress<>Nil then
  757. begin
  758. Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
  759. PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
  760. PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
  761. end;
  762. end;
  763. fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
  764. Pointer(fpc_widestr_Copy):=ResultAddress;
  765. end;
  766. Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
  767. var
  768. i,MaxLen : SizeInt;
  769. pc : pwidechar;
  770. begin
  771. Pos:=0;
  772. if Length(SubStr)>0 then
  773. begin
  774. MaxLen:=Length(source)-Length(SubStr);
  775. i:=0;
  776. pc:=@source[1];
  777. while (i<=MaxLen) do
  778. begin
  779. inc(i);
  780. if (SubStr[1]=pc^) and
  781. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  782. begin
  783. Pos:=i;
  784. exit;
  785. end;
  786. inc(pc);
  787. end;
  788. end;
  789. end;
  790. { Faster version for a widechar alone }
  791. Function Pos (c : WideChar; Const s : WideString) : SizeInt;
  792. var
  793. i: SizeInt;
  794. pc : pwidechar;
  795. begin
  796. pc:=@s[1];
  797. for i:=1 to length(s) do
  798. begin
  799. if pc^=c then
  800. begin
  801. pos:=i;
  802. exit;
  803. end;
  804. inc(pc);
  805. end;
  806. pos:=0;
  807. end;
  808. Function Pos (c : WideChar; Const s : AnsiString) : SizeInt;
  809. begin
  810. result:=Pos(c,WideString(s));
  811. end;
  812. Function Pos (c : AnsiString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  813. begin
  814. result:=Pos(WideString(c),s);
  815. end;
  816. Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  817. begin
  818. result:=Pos(WideString(c),s);
  819. end;
  820. Function Pos (c : WideString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  821. begin
  822. result:=Pos(c,WideString(s));
  823. end;
  824. { Faster version for a char alone. Must be implemented because }
  825. { pos(c: char; const s: shortstring) also exists, so otherwise }
  826. { using pos(char,pchar) will always call the shortstring version }
  827. { (exact match for first argument), also with $h+ (JM) }
  828. Function Pos (c : Char; Const s : WideString) : SizeInt;
  829. var
  830. i: SizeInt;
  831. wc : widechar;
  832. pc : pwidechar;
  833. begin
  834. wc:=c;
  835. pc:=@s[1];
  836. for i:=1 to length(s) do
  837. begin
  838. if pc^=wc then
  839. begin
  840. pos:=i;
  841. exit;
  842. end;
  843. inc(pc);
  844. end;
  845. pos:=0;
  846. end;
  847. Procedure Delete (Var S : WideString; Index,Size: SizeInt);
  848. Var
  849. LS : SizeInt;
  850. begin
  851. LS:=Length(S);
  852. if (Index>LS) or (Index<=0) or (Size<=0) then
  853. exit;
  854. UniqueString (S);
  855. { (Size+Index) will overflow if Size=MaxInt. }
  856. if Size>LS-Index then
  857. Size:=LS-Index+1;
  858. if Size<=LS-Index then
  859. begin
  860. Dec(Index);
  861. Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
  862. end;
  863. Setlength(s,LS-Size);
  864. end;
  865. Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
  866. var
  867. Temp : WideString;
  868. LS : SizeInt;
  869. begin
  870. If Length(Source)=0 then
  871. exit;
  872. if index <= 0 then
  873. index := 1;
  874. Ls:=Length(S);
  875. if index > LS then
  876. index := LS+1;
  877. Dec(Index);
  878. Pointer(Temp) := NewWideString(Length(Source)+LS);
  879. SetLength(Temp,Length(Source)+LS);
  880. If Index>0 then
  881. move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
  882. Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
  883. If (LS-Index)>0 then
  884. Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
  885. S:=Temp;
  886. end;
  887. function UpCase(const s : WideString) : WideString;
  888. begin
  889. result:=widestringmanager.UpperWideStringProc(s);
  890. end;
  891. Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
  892. begin
  893. SetLength(S,Len);
  894. If (Buf<>Nil) and (Len>0) then
  895. Move (Buf[0],S[1],Len*sizeof(WideChar));
  896. end;
  897. Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
  898. begin
  899. SetLength(S,Len);
  900. If (Buf<>Nil) and (Len>0) then
  901. begin
  902. BufLen := IndexByte(Buf^, Len+1, 0);
  903. If (BufLen>0) and (BufLen < Len) then
  904. Len := BufLen;
  905. widestringmanager.Ansi2WideMoveProc(Buf,DefaultSystemCodePage,S,Len);
  906. //PWideChar(Pointer(S)+Len*sizeof(WideChar))^:=#0;
  907. end;
  908. end;
  909. {$ifndef FPUNONE}
  910. Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
  911. Var
  912. SS : String;
  913. begin
  914. fpc_Val_Real_WideStr := 0;
  915. if length(S) > 255 then
  916. code := 256
  917. else
  918. begin
  919. SS := S;
  920. Val(SS,fpc_Val_Real_WideStr,code);
  921. end;
  922. end;
  923. {$endif}
  924. function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
  925. var ss:shortstring;
  926. begin
  927. if length(s)>255 then
  928. code:=256
  929. else
  930. begin
  931. ss:=s;
  932. val(ss,fpc_val_enum_widestr,code);
  933. end;
  934. end;
  935. Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
  936. Var
  937. SS : String;
  938. begin
  939. if length(S) > 255 then
  940. begin
  941. fpc_Val_Currency_WideStr:=0;
  942. code := 256;
  943. end
  944. else
  945. begin
  946. SS := S;
  947. Val(SS,fpc_Val_Currency_WideStr,code);
  948. end;
  949. end;
  950. Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
  951. Var
  952. SS : ShortString;
  953. begin
  954. fpc_Val_UInt_WideStr := 0;
  955. if length(S) > 255 then
  956. code := 256
  957. else
  958. begin
  959. SS := S;
  960. Val(SS,fpc_Val_UInt_WideStr,code);
  961. end;
  962. end;
  963. Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
  964. Var
  965. SS : ShortString;
  966. begin
  967. fpc_Val_SInt_WideStr:=0;
  968. if length(S)>255 then
  969. code:=256
  970. else
  971. begin
  972. SS := S;
  973. fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  974. end;
  975. end;
  976. {$ifndef CPU64}
  977. Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
  978. Var
  979. SS : ShortString;
  980. begin
  981. fpc_Val_qword_WideStr:=0;
  982. if length(S)>255 then
  983. code:=256
  984. else
  985. begin
  986. SS := S;
  987. Val(SS,fpc_Val_qword_WideStr,Code);
  988. end;
  989. end;
  990. Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
  991. Var
  992. SS : ShortString;
  993. begin
  994. fpc_Val_int64_WideStr:=0;
  995. if length(S)>255 then
  996. code:=256
  997. else
  998. begin
  999. SS := S;
  1000. Val(SS,fpc_Val_int64_WideStr,Code);
  1001. end;
  1002. end;
  1003. {$endif CPU64}
  1004. {$ifndef FPUNONE}
  1005. procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
  1006. var
  1007. ss : shortstring;
  1008. begin
  1009. str_real(len,fr,d,treal_type(rt),ss);
  1010. s:=ss;
  1011. end;
  1012. {$endif}
  1013. procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
  1014. var ss:shortstring;
  1015. begin
  1016. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1017. s:=ss;
  1018. end;
  1019. procedure fpc_widestr_bool(b : boolean;len:sizeint;out s:widestring);compilerproc;
  1020. var ss:shortstring;
  1021. begin
  1022. fpc_shortstr_bool(b,len,ss);
  1023. s:=ss;
  1024. end;
  1025. {$ifdef FPC_HAS_STR_CURRENCY}
  1026. procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
  1027. var
  1028. ss : shortstring;
  1029. begin
  1030. str(c:len:fr,ss);
  1031. s:=ss;
  1032. end;
  1033. {$endif FPC_HAS_STR_CURRENCY}
  1034. Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
  1035. Var
  1036. SS : ShortString;
  1037. begin
  1038. Str (v:Len,SS);
  1039. S:=SS;
  1040. end;
  1041. Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
  1042. Var
  1043. SS : ShortString;
  1044. begin
  1045. str(v:Len,SS);
  1046. S:=SS;
  1047. end;
  1048. {$ifndef CPU64}
  1049. Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
  1050. Var
  1051. SS : ShortString;
  1052. begin
  1053. Str (v:Len,SS);
  1054. S:=SS;
  1055. end;
  1056. Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
  1057. Var
  1058. SS : ShortString;
  1059. begin
  1060. str(v:Len,SS);
  1061. S:=SS;
  1062. end;
  1063. {$endif CPU64}
  1064. { converts an utf-16 code point or surrogate pair to utf-32 }
  1065. function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
  1066. var
  1067. w: widechar;
  1068. begin
  1069. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1070. { are the same in UTF-32 }
  1071. w:=s[index];
  1072. if (w<=#$d7ff) or
  1073. (w>=#$e000) then
  1074. begin
  1075. result:=UCS4Char(w);
  1076. len:=1;
  1077. end
  1078. { valid surrogate pair? }
  1079. else if (w<=#$dbff) and
  1080. { w>=#$d7ff check not needed, checked above }
  1081. (index<length(s)) and
  1082. (s[index+1]>=#$dc00) and
  1083. (s[index+1]<=#$dfff) then
  1084. { convert the surrogate pair to UTF-32 }
  1085. begin
  1086. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1087. len:=2;
  1088. end
  1089. else
  1090. { invalid surrogate -> do nothing }
  1091. begin
  1092. result:=UCS4Char(w);
  1093. len:=1;
  1094. end;
  1095. end;
  1096. function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1097. begin
  1098. if assigned(Source) then
  1099. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1100. else
  1101. Result:=0;
  1102. end;
  1103. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
  1104. var
  1105. i,j : SizeUInt;
  1106. w : word;
  1107. lw : longword;
  1108. len : longint;
  1109. begin
  1110. result:=0;
  1111. if source=nil then
  1112. exit;
  1113. i:=0;
  1114. j:=0;
  1115. if assigned(Dest) then
  1116. begin
  1117. while (i<SourceChars) and (j<MaxDestBytes) do
  1118. begin
  1119. w:=word(Source[i]);
  1120. case w of
  1121. 0..$7f:
  1122. begin
  1123. Dest[j]:=char(w);
  1124. inc(j);
  1125. end;
  1126. $80..$7ff:
  1127. begin
  1128. if j+1>=MaxDestBytes then
  1129. break;
  1130. Dest[j]:=char($c0 or (w shr 6));
  1131. Dest[j+1]:=char($80 or (w and $3f));
  1132. inc(j,2);
  1133. end;
  1134. $800..$d7ff,$e000..$ffff:
  1135. begin
  1136. if j+2>=MaxDestBytes then
  1137. break;
  1138. Dest[j]:=char($e0 or (w shr 12));
  1139. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1140. Dest[j+2]:=char($80 or (w and $3f));
  1141. inc(j,3);
  1142. end;
  1143. $d800..$dbff:
  1144. {High Surrogates}
  1145. begin
  1146. if j+3>=MaxDestBytes then
  1147. break;
  1148. if (i<sourcechars-1) and
  1149. (word(Source[i+1]) >= $dc00) and
  1150. (word(Source[i+1]) <= $dfff) then
  1151. begin
  1152. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1153. Dest[j]:=char($f0 or (lw shr 18));
  1154. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1155. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1156. Dest[j+3]:=char($80 or (lw and $3f));
  1157. inc(j,4);
  1158. inc(i);
  1159. end;
  1160. end;
  1161. end;
  1162. inc(i);
  1163. end;
  1164. if j>SizeUInt(MaxDestBytes-1) then
  1165. j:=MaxDestBytes-1;
  1166. Dest[j]:=#0;
  1167. end
  1168. else
  1169. begin
  1170. while i<SourceChars do
  1171. begin
  1172. case word(Source[i]) of
  1173. $0..$7f:
  1174. inc(j);
  1175. $80..$7ff:
  1176. inc(j,2);
  1177. $800..$d7ff,$e000..$ffff:
  1178. inc(j,3);
  1179. $d800..$dbff:
  1180. begin
  1181. if (i<sourcechars-1) and
  1182. (word(Source[i+1]) >= $dc00) and
  1183. (word(Source[i+1]) <= $dfff) then
  1184. begin
  1185. inc(j,4);
  1186. inc(i);
  1187. end;
  1188. end;
  1189. end;
  1190. inc(i);
  1191. end;
  1192. end;
  1193. result:=j+1;
  1194. end;
  1195. function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1196. begin
  1197. if assigned(Source) then
  1198. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1199. else
  1200. Result:=0;
  1201. end;
  1202. function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1203. const
  1204. UNICODE_INVALID=63;
  1205. var
  1206. InputUTF8: SizeUInt;
  1207. IBYTE: BYTE;
  1208. OutputUnicode: SizeUInt;
  1209. PRECHAR: SizeUInt;
  1210. TempBYTE: BYTE;
  1211. CharLen: SizeUint;
  1212. LookAhead: SizeUInt;
  1213. UC: SizeUInt;
  1214. begin
  1215. if not assigned(Source) then
  1216. begin
  1217. result:=0;
  1218. exit;
  1219. end;
  1220. result:=SizeUInt(-1);
  1221. InputUTF8:=0;
  1222. OutputUnicode:=0;
  1223. PreChar:=0;
  1224. if Assigned(Dest) Then
  1225. begin
  1226. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1227. begin
  1228. IBYTE:=byte(Source[InputUTF8]);
  1229. if (IBYTE and $80) = 0 then
  1230. begin
  1231. //One character US-ASCII, convert it to unicode
  1232. if IBYTE = 10 then
  1233. begin
  1234. If (PreChar<>13) and FALSE then
  1235. begin
  1236. //Expand to crlf, conform UTF-8.
  1237. //This procedure will break the memory alocation by
  1238. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1239. if OutputUnicode+1<MaxDestChars then
  1240. begin
  1241. Dest[OutputUnicode]:=WideChar(13);
  1242. inc(OutputUnicode);
  1243. Dest[OutputUnicode]:=WideChar(10);
  1244. inc(OutputUnicode);
  1245. PreChar:=10;
  1246. end
  1247. else
  1248. begin
  1249. Dest[OutputUnicode]:=WideChar(13);
  1250. inc(OutputUnicode);
  1251. end;
  1252. end
  1253. else
  1254. begin
  1255. Dest[OutputUnicode]:=WideChar(IBYTE);
  1256. inc(OutputUnicode);
  1257. PreChar:=IBYTE;
  1258. end;
  1259. end
  1260. else
  1261. begin
  1262. Dest[OutputUnicode]:=WideChar(IBYTE);
  1263. inc(OutputUnicode);
  1264. PreChar:=IBYTE;
  1265. end;
  1266. inc(InputUTF8);
  1267. end
  1268. else
  1269. begin
  1270. TempByte:=IBYTE;
  1271. CharLen:=0;
  1272. while (TempBYTE and $80)<>0 do
  1273. begin
  1274. TempBYTE:=(TempBYTE shl 1) and $FE;
  1275. inc(CharLen);
  1276. end;
  1277. //Test for the "CharLen" conforms UTF-8 string
  1278. //This means the 10xxxxxx pattern.
  1279. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1280. begin
  1281. //Insuficient chars in string to decode
  1282. //UTF-8 array. Fallback to single char.
  1283. CharLen:= 1;
  1284. end;
  1285. for LookAhead := 1 to CharLen-1 do
  1286. begin
  1287. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1288. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1289. begin
  1290. //Invalid UTF-8 sequence, fallback.
  1291. CharLen:= LookAhead;
  1292. break;
  1293. end;
  1294. end;
  1295. UC:=$FFFF;
  1296. case CharLen of
  1297. 1: begin
  1298. //Not valid UTF-8 sequence
  1299. UC:=UNICODE_INVALID;
  1300. end;
  1301. 2: begin
  1302. //Two bytes UTF, convert it
  1303. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1304. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1305. if UC <= $7F then
  1306. begin
  1307. //Invalid UTF sequence.
  1308. UC:=UNICODE_INVALID;
  1309. end;
  1310. end;
  1311. 3: begin
  1312. //Three bytes, convert it to unicode
  1313. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1314. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1315. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1316. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1317. begin
  1318. //Invalid UTF-8 sequence
  1319. UC:= UNICODE_INVALID;
  1320. End;
  1321. end;
  1322. 4: begin
  1323. //Four bytes, convert it to two unicode characters
  1324. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1325. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1326. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1327. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1328. if (UC < $10000) or (UC > $10FFFF) then
  1329. begin
  1330. UC:= UNICODE_INVALID;
  1331. end
  1332. else
  1333. begin
  1334. { only store pair if room }
  1335. dec(UC,$10000);
  1336. if (OutputUnicode<MaxDestChars-1) then
  1337. begin
  1338. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1339. inc(OutputUnicode);
  1340. UC:=(UC and $3ff) + $DC00;
  1341. end
  1342. else
  1343. begin
  1344. InputUTF8:= InputUTF8 + CharLen;
  1345. { don't store anything }
  1346. CharLen:=0;
  1347. end;
  1348. end;
  1349. end;
  1350. 5,6,7: begin
  1351. //Invalid UTF8 to unicode conversion,
  1352. //mask it as invalid UNICODE too.
  1353. UC:=UNICODE_INVALID;
  1354. end;
  1355. end;
  1356. if CharLen > 0 then
  1357. begin
  1358. PreChar:=UC;
  1359. Dest[OutputUnicode]:=WideChar(UC);
  1360. inc(OutputUnicode);
  1361. end;
  1362. InputUTF8:= InputUTF8 + CharLen;
  1363. end;
  1364. end;
  1365. Result:=OutputUnicode+1;
  1366. end
  1367. else
  1368. begin
  1369. while (InputUTF8<SourceBytes) do
  1370. begin
  1371. IBYTE:=byte(Source[InputUTF8]);
  1372. if (IBYTE and $80) = 0 then
  1373. begin
  1374. //One character US-ASCII, convert it to unicode
  1375. if IBYTE = 10 then
  1376. begin
  1377. if (PreChar<>13) and FALSE then
  1378. begin
  1379. //Expand to crlf, conform UTF-8.
  1380. //This procedure will break the memory alocation by
  1381. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1382. inc(OutputUnicode,2);
  1383. PreChar:=10;
  1384. end
  1385. else
  1386. begin
  1387. inc(OutputUnicode);
  1388. PreChar:=IBYTE;
  1389. end;
  1390. end
  1391. else
  1392. begin
  1393. inc(OutputUnicode);
  1394. PreChar:=IBYTE;
  1395. end;
  1396. inc(InputUTF8);
  1397. end
  1398. else
  1399. begin
  1400. TempByte:=IBYTE;
  1401. CharLen:=0;
  1402. while (TempBYTE and $80)<>0 do
  1403. begin
  1404. TempBYTE:=(TempBYTE shl 1) and $FE;
  1405. inc(CharLen);
  1406. end;
  1407. //Test for the "CharLen" conforms UTF-8 string
  1408. //This means the 10xxxxxx pattern.
  1409. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1410. begin
  1411. //Insuficient chars in string to decode
  1412. //UTF-8 array. Fallback to single char.
  1413. CharLen:= 1;
  1414. end;
  1415. for LookAhead := 1 to CharLen-1 do
  1416. begin
  1417. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1418. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1419. begin
  1420. //Invalid UTF-8 sequence, fallback.
  1421. CharLen:= LookAhead;
  1422. break;
  1423. end;
  1424. end;
  1425. UC:=$FFFF;
  1426. case CharLen of
  1427. 1: begin
  1428. //Not valid UTF-8 sequence
  1429. UC:=UNICODE_INVALID;
  1430. end;
  1431. 2: begin
  1432. //Two bytes UTF, convert it
  1433. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1434. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1435. if UC <= $7F then
  1436. begin
  1437. //Invalid UTF sequence.
  1438. UC:=UNICODE_INVALID;
  1439. end;
  1440. end;
  1441. 3: begin
  1442. //Three bytes, convert it to unicode
  1443. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1444. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1445. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1446. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1447. begin
  1448. //Invalid UTF-8 sequence
  1449. UC:= UNICODE_INVALID;
  1450. end;
  1451. end;
  1452. 4: begin
  1453. //Four bytes, convert it to two unicode characters
  1454. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1455. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1456. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1457. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1458. if (UC < $10000) or (UC > $10FFFF) then
  1459. UC:= UNICODE_INVALID
  1460. else
  1461. { extra character character }
  1462. inc(OutputUnicode);
  1463. end;
  1464. 5,6,7: begin
  1465. //Invalid UTF8 to unicode conversion,
  1466. //mask it as invalid UNICODE too.
  1467. UC:=UNICODE_INVALID;
  1468. end;
  1469. end;
  1470. if CharLen > 0 then
  1471. begin
  1472. PreChar:=UC;
  1473. inc(OutputUnicode);
  1474. end;
  1475. InputUTF8:= InputUTF8 + CharLen;
  1476. end;
  1477. end;
  1478. Result:=OutputUnicode+1;
  1479. end;
  1480. end;
  1481. function UTF8Encode(const s : WideString) : UTF8String;
  1482. var
  1483. i : SizeInt;
  1484. hs : UTF8String;
  1485. begin
  1486. result:='';
  1487. if s='' then
  1488. exit;
  1489. SetLength(hs,length(s)*3);
  1490. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
  1491. if i>0 then
  1492. begin
  1493. SetLength(hs,i-1);
  1494. result:=hs;
  1495. end;
  1496. end;
  1497. const
  1498. SNoWidestrings = 'This binary has no widestrings support compiled in.';
  1499. SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
  1500. procedure unimplementedwidestring;
  1501. begin
  1502. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1503. If IsConsole then
  1504. begin
  1505. Writeln(StdErr,SNoWidestrings);
  1506. Writeln(StdErr,SRecompileWithWidestrings);
  1507. end;
  1508. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1509. HandleErrorFrame(233,get_frame);
  1510. end;
  1511. {$warnings off}
  1512. function GenericWideCase(const s : WideString) : WideString;
  1513. begin
  1514. unimplementedwidestring;
  1515. end;
  1516. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  1517. begin
  1518. unimplementedwidestring;
  1519. end;
  1520. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  1521. begin
  1522. unimplementedwidestring;
  1523. end;
  1524. {$warnings on}
  1525. function DefaultCharLengthPChar(const Str: PChar): PtrInt;forward;
  1526. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;forward;
  1527. procedure initwidestringmanager;
  1528. begin
  1529. fillchar(widestringmanager,sizeof(widestringmanager),0);
  1530. {$ifndef HAS_WIDESTRINGMANAGER}
  1531. widestringmanager.Wide2AnsiMoveProc:=@DefaultWide2AnsiMove;
  1532. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  1533. widestringmanager.UpperWideStringProc:=@GenericWideCase;
  1534. widestringmanager.LowerWideStringProc:=@GenericWideCase;
  1535. {$endif HAS_WIDESTRINGMANAGER}
  1536. widestringmanager.CompareWideStringProc:=@CompareWideString;
  1537. widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
  1538. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1539. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1540. end;