ustrings.inc 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of chars. Multiply with
  20. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  21. @ : String + Terminating #0;
  22. Punicodechar(Unicodestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PUnicodeRec = ^TUnicodeRec;
  29. TUnicodeRec = Packed Record
  30. CodePage : TSystemCodePage;
  31. ElementSize : Word;
  32. {$ifdef CPU64}
  33. { align fields }
  34. Dummy : DWord;
  35. {$endif CPU64}
  36. Ref : SizeInt;
  37. Len : SizeInt;
  38. First : UnicodeChar;
  39. end;
  40. Const
  41. UnicodeRecLen = SizeOf(TUnicodeRec);
  42. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overwritten for the Current Locale
  47. }
  48. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  49. var
  50. i : SizeInt;
  51. p : PAnsiChar;
  52. begin
  53. setlength(dest,len);
  54. p:=pointer(dest); {SetLength guarantees that dest is unique}
  55. for i:=1 to len do
  56. begin
  57. if word(source^)<256 then
  58. p^:=char(word(source^))
  59. else
  60. p^:='?';
  61. inc(source);
  62. inc(p);
  63. end;
  64. end;
  65. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  66. var
  67. i : SizeInt;
  68. p : PUnicodeChar;
  69. begin
  70. setlength(dest,len);
  71. p:=pointer(dest); {SetLength guarantees that dest is unique}
  72. for i:=1 to len do
  73. begin
  74. p^:=unicodechar(byte(source^));
  75. inc(source);
  76. inc(p);
  77. end;
  78. end;
  79. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  80. begin
  81. DefaultCharLengthPChar:=length(Str);
  82. end;
  83. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  84. begin
  85. if str[0]<>#0 then
  86. DefaultCodePointLength:=1
  87. else
  88. DefaultCodePointLength:=0;
  89. end;
  90. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  91. begin
  92. { don't raise an exception here. We need this for text file handling }
  93. Result:=DefaultSystemCodePage;
  94. end;
  95. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  96. begin
  97. manager:=widestringmanager;
  98. end;
  99. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  100. begin
  101. Old:=widestringmanager;
  102. widestringmanager:=New;
  103. end;
  104. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  105. begin
  106. widestringmanager:=New;
  107. end;
  108. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  109. begin
  110. manager:=widestringmanager;
  111. end;
  112. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  113. begin
  114. Old:=widestringmanager;
  115. widestringmanager:=New;
  116. end;
  117. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  118. begin
  119. widestringmanager:=New;
  120. end;
  121. {****************************************************************************
  122. Internal functions, not in interface.
  123. ****************************************************************************}
  124. procedure UnicodeStringError;
  125. begin
  126. HandleErrorFrame(204,get_frame);
  127. end;
  128. Function NewUnicodeString(Len : SizeInt) : Pointer;
  129. {
  130. Allocate a new UnicodeString on the heap.
  131. initialize it to zero length and reference count 1.
  132. }
  133. Var
  134. P : Pointer;
  135. begin
  136. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  137. If P<>Nil then
  138. begin
  139. PUnicodeRec(P)^.Len:=Len; { Initial length }
  140. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  141. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  142. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  143. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  144. inc(p,UnicodeFirstOff); { Points to string now }
  145. end
  146. else
  147. UnicodeStringError;
  148. NewUnicodeString:=P;
  149. end;
  150. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  151. {
  152. Decreases the ReferenceCount of a non constant unicodestring;
  153. If the reference count is zero, deallocate the string;
  154. }
  155. Var
  156. p: PUnicodeRec;
  157. Begin
  158. { Zero string }
  159. if S=Nil then
  160. exit;
  161. { check for constant strings ...}
  162. p:=PUnicodeRec(S-UnicodeFirstOff);
  163. S:=nil;
  164. if p^.Ref<0 then
  165. exit;
  166. { declocked does a MT safe dec and returns true, if the counter is 0 }
  167. if declocked(p^.Ref) then
  168. FreeMem(p);
  169. end;
  170. { alias for internal use }
  171. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  172. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  173. Begin
  174. If S=Nil then
  175. exit;
  176. { constant string ? }
  177. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  178. exit;
  179. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  180. end;
  181. { alias for internal use }
  182. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  183. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  184. {
  185. Converts a UnicodeString to a ShortString;
  186. }
  187. Var
  188. Size : SizeInt;
  189. temp : ansistring;
  190. begin
  191. res:='';
  192. Size:=Length(S2);
  193. if Size>0 then
  194. begin
  195. If Size>high(res) then
  196. Size:=high(res);
  197. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  198. res:=temp;
  199. end;
  200. end;
  201. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  202. {
  203. Converts a ShortString to a UnicodeString;
  204. }
  205. Var
  206. Size : SizeInt;
  207. begin
  208. result:='';
  209. Size:=Length(S2);
  210. if Size>0 then
  211. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  212. end;
  213. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  214. {
  215. Converts a UnicodeString to an AnsiString
  216. }
  217. Var
  218. Size : SizeInt;
  219. {$ifndef FPC_HAS_CPSTRING}
  220. cp : TSystemCodePage;
  221. {$endif FPC_HAS_CPSTRING}
  222. begin
  223. {$ifndef FPC_HAS_CPSTRING}
  224. cp:=DefaultSystemCodePage;
  225. {$endif FPC_HAS_CPSTRING}
  226. result:='';
  227. Size:=Length(S2);
  228. if Size>0 then
  229. begin
  230. if (cp=CP_ACP) then
  231. cp:=DefaultSystemCodePage;
  232. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  233. end;
  234. end;
  235. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  236. {
  237. Converts an AnsiString to a UnicodeString;
  238. }
  239. Var
  240. Size : SizeInt;
  241. cp: TSystemCodePage;
  242. begin
  243. result:='';
  244. Size:=Length(S2);
  245. if Size>0 then
  246. begin
  247. cp:=StringCodePage(S2);
  248. if (cp=CP_ACP) then
  249. cp:=DefaultSystemCodePage;
  250. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  251. end;
  252. end;
  253. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  254. begin
  255. SetLength(Result,Length(S2));
  256. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  257. end;
  258. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  259. begin
  260. SetLength(Result,Length(S2));
  261. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  262. end;
  263. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  264. var
  265. Size : SizeInt;
  266. begin
  267. result:='';
  268. if p=nil then
  269. exit;
  270. Size := IndexWord(p^, -1, 0);
  271. Setlength(result,Size);
  272. if Size>0 then
  273. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  274. end;
  275. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  276. var
  277. Size : SizeInt;
  278. {$ifndef FPC_HAS_CPSTRING}
  279. cp : TSystemCodePage;
  280. {$endif FPC_HAS_CPSTRING}
  281. begin
  282. {$ifndef FPC_HAS_CPSTRING}
  283. cp:=DefaultSystemCodePage;
  284. {$endif FPC_HAS_CPSTRING}
  285. result:='';
  286. if p=nil then
  287. exit;
  288. Size := IndexWord(p^, -1, 0);
  289. if Size>0 then
  290. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  291. end;
  292. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  293. var
  294. Size : SizeInt;
  295. temp: ansistring;
  296. begin
  297. res:='';
  298. if p=nil then
  299. exit;
  300. Size:=IndexWord(p^, high(PtrInt), 0);
  301. if Size>0 then
  302. begin
  303. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  304. res:=temp;
  305. end;
  306. end;
  307. { checked against the ansistring routine, 2001-05-27 (FK) }
  308. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  309. {
  310. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  311. }
  312. begin
  313. If S2<>nil then
  314. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  315. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  316. { Decrease the reference count on the old S1 }
  317. fpc_unicodestr_decr_ref (S1);
  318. s1:=s2;
  319. end;
  320. { alias for internal use }
  321. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  322. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  323. Var
  324. Size,Location : SizeInt;
  325. same : boolean;
  326. begin
  327. { only assign if s1 or s2 is empty }
  328. if (S1='') then
  329. begin
  330. DestS:=s2;
  331. exit;
  332. end;
  333. if (S2='') then
  334. begin
  335. DestS:=s1;
  336. exit;
  337. end;
  338. Location:=Length(S1);
  339. Size:=length(S2);
  340. { Use Pointer() typecasts to prevent extra conversion code }
  341. if Pointer(DestS)=Pointer(S1) then
  342. begin
  343. same:=Pointer(S1)=Pointer(S2);
  344. SetLength(DestS,Size+Location);
  345. if same then
  346. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  347. else
  348. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  349. end
  350. else if Pointer(DestS)=Pointer(S2) then
  351. begin
  352. SetLength(DestS,Size+Location);
  353. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  354. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  355. end
  356. else
  357. begin
  358. DestS:='';
  359. SetLength(DestS,Size+Location);
  360. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  361. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  362. end;
  363. end;
  364. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  365. Var
  366. i : Longint;
  367. p,pc : pointer;
  368. Size,NewLen : SizeInt;
  369. lowstart : longint;
  370. destcopy : pointer;
  371. OldDestLen : SizeInt;
  372. begin
  373. if high(sarr)=0 then
  374. begin
  375. DestS:='';
  376. exit;
  377. end;
  378. destcopy:=nil;
  379. lowstart:=low(sarr);
  380. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  381. inc(lowstart);
  382. { Check for another reuse, then we can't use
  383. the append optimization }
  384. for i:=lowstart to high(sarr) do
  385. begin
  386. if Pointer(DestS)=Pointer(sarr[i]) then
  387. begin
  388. { if DestS is used somewhere in the middle of the expression,
  389. we need to make sure the original string still exists after
  390. we empty/modify DestS.
  391. This trick only works with reference counted strings. Therefor
  392. this optimization is disabled for WINLIKEUNICODESTRING }
  393. destcopy:=pointer(dests);
  394. fpc_UnicodeStr_Incr_Ref(destcopy);
  395. lowstart:=low(sarr);
  396. break;
  397. end;
  398. end;
  399. { Start with empty DestS if we start with concatting
  400. the first array element }
  401. if lowstart=low(sarr) then
  402. DestS:='';
  403. OldDestLen:=length(DestS);
  404. { Calculate size of the result so we can do
  405. a single call to SetLength() }
  406. NewLen:=0;
  407. for i:=low(sarr) to high(sarr) do
  408. inc(NewLen,length(sarr[i]));
  409. SetLength(DestS,NewLen);
  410. { Concat all strings, except the string we already
  411. copied in DestS }
  412. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  413. for i:=lowstart to high(sarr) do
  414. begin
  415. p:=pointer(sarr[i]);
  416. if assigned(p) then
  417. begin
  418. Size:=length(unicodestring(p));
  419. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  420. inc(pc,size*sizeof(UnicodeChar));
  421. end;
  422. end;
  423. fpc_UnicodeStr_Decr_Ref(destcopy);
  424. end;
  425. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  426. var
  427. w: unicodestring;
  428. begin
  429. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  430. fpc_Char_To_UChar:=w[1];
  431. end;
  432. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  433. {
  434. Converts a Char to a UnicodeString;
  435. }
  436. begin
  437. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  438. end;
  439. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  440. {
  441. Converts a UnicodeChar to a Char;
  442. }
  443. var
  444. s: ansistring;
  445. begin
  446. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  447. if length(s)=1 then
  448. fpc_UChar_To_Char:= s[1]
  449. else
  450. fpc_UChar_To_Char:='?';
  451. end;
  452. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  453. {
  454. Converts a WideChar to a ShortString;
  455. }
  456. var
  457. s: ansistring;
  458. begin
  459. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  460. res:=s;
  461. end;
  462. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  463. {
  464. Converts a UnicodeChar to a UnicodeString;
  465. }
  466. begin
  467. Setlength (fpc_UChar_To_UnicodeStr,1);
  468. fpc_UChar_To_UnicodeStr[1]:= c;
  469. end;
  470. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  471. {
  472. Converts a UnicodeChar to a AnsiString;
  473. }
  474. {$ifndef FPC_HAS_CPSTRING}
  475. var
  476. cp : TSystemCodePage;
  477. {$endif FPC_HAS_CPSTRING}
  478. begin
  479. {$ifndef FPC_HAS_CPSTRING}
  480. cp:=DefaultSystemCodePage;
  481. {$endif FPC_HAS_CPSTRING}
  482. if (cp=CP_ACP) then
  483. cp:=DefaultSystemCodePage;
  484. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  485. end;
  486. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  487. Var
  488. L : SizeInt;
  489. begin
  490. if (not assigned(p)) or (p[0]=#0) Then
  491. begin
  492. fpc_pchar_to_unicodestr := '';
  493. exit;
  494. end;
  495. l:=IndexChar(p^,-1,#0);
  496. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  497. end;
  498. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  499. var
  500. i : SizeInt;
  501. begin
  502. if zerobased then
  503. begin
  504. if arr[0]=#0 Then
  505. begin
  506. fpc_chararray_to_unicodestr:='';
  507. exit;
  508. end;
  509. i:=IndexChar(arr,high(arr)+1,#0);
  510. if i=-1 then
  511. i:=high(arr)+1;
  512. end
  513. else
  514. i:=high(arr)+1;
  515. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  516. end;
  517. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  518. var
  519. i : SizeInt;
  520. begin
  521. if (zerobased) then
  522. begin
  523. i:=IndexWord(arr,high(arr)+1,0);
  524. if i = -1 then
  525. i := high(arr)+1;
  526. end
  527. else
  528. i := high(arr)+1;
  529. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  530. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  531. end;
  532. { due to their names, the following procedures should be in wstrings.inc,
  533. however, the compiler generates code using this functions on all platforms }
  534. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  535. var
  536. l: longint;
  537. index: ptrint;
  538. len: byte;
  539. temp: ansistring;
  540. begin
  541. l := high(arr)+1;
  542. if l>=high(res)+1 then
  543. l:=high(res)
  544. else if l<0 then
  545. l:=0;
  546. if zerobased then
  547. begin
  548. index:=IndexWord(arr[0],l,0);
  549. if index<0 then
  550. len:=l
  551. else
  552. len:=index;
  553. end
  554. else
  555. len:=l;
  556. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  557. res:=temp;
  558. end;
  559. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  560. var
  561. i : SizeInt;
  562. {$ifndef FPC_HAS_CPSTRING}
  563. cp : TSystemCodePage;
  564. {$endif FPC_HAS_CPSTRING}
  565. begin
  566. {$ifndef FPC_HAS_CPSTRING}
  567. cp:=DefaultSystemCodePage;
  568. {$endif FPC_HAS_CPSTRING}
  569. if (zerobased) then
  570. begin
  571. i:=IndexWord(arr,high(arr)+1,0);
  572. if i = -1 then
  573. i := high(arr)+1;
  574. end
  575. else
  576. i := high(arr)+1;
  577. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  578. end;
  579. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  580. var
  581. i : SizeInt;
  582. begin
  583. if (zerobased) then
  584. begin
  585. i:=IndexWord(arr,high(arr)+1,0);
  586. if i = -1 then
  587. i := high(arr)+1;
  588. end
  589. else
  590. i := high(arr)+1;
  591. SetLength(fpc_WideCharArray_To_WideStr,i);
  592. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  593. end;
  594. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  595. var
  596. len: SizeInt;
  597. temp: ansistring;
  598. begin
  599. len := length(src);
  600. { make sure we don't dereference src if it can be nil (JM) }
  601. if len > 0 then
  602. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  603. len := length(temp);
  604. if len > length(res) then
  605. len := length(res);
  606. {$push}
  607. {$r-}
  608. move(temp[1],res[0],len);
  609. fillchar(res[len],length(res)-len,0);
  610. {$pop}
  611. end;
  612. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  613. var
  614. len: SizeInt;
  615. temp: widestring;
  616. begin
  617. len := length(src);
  618. { make sure we don't dereference src if it can be nil (JM) }
  619. if len > 0 then
  620. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  621. len := length(temp);
  622. if len > length(res) then
  623. len := length(res);
  624. {$push}
  625. {$r-}
  626. move(temp[1],res[0],len*sizeof(widechar));
  627. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  628. {$pop}
  629. end;
  630. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  631. var
  632. len: longint;
  633. temp : widestring;
  634. begin
  635. len := length(src);
  636. { make sure we don't access char 1 if length is 0 (JM) }
  637. if len > 0 then
  638. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  639. len := length(temp);
  640. if len > length(res) then
  641. len := length(res);
  642. {$push}
  643. {$r-}
  644. move(temp[1],res[0],len*sizeof(widechar));
  645. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  646. {$pop}
  647. end;
  648. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  649. var
  650. len: SizeInt;
  651. begin
  652. len := length(src);
  653. if len > length(res) then
  654. len := length(res);
  655. {$push}
  656. {$r-}
  657. { make sure we don't try to access element 1 of the widestring if it's nil }
  658. if len > 0 then
  659. move(src[1],res[0],len*SizeOf(WideChar));
  660. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  661. {$pop}
  662. end;
  663. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  664. {
  665. Compares 2 UnicodeStrings;
  666. The result is
  667. <0 if S1<S2
  668. 0 if S1=S2
  669. >0 if S1>S2
  670. }
  671. Var
  672. MaxI,Temp : SizeInt;
  673. begin
  674. if pointer(S1)=pointer(S2) then
  675. begin
  676. fpc_UnicodeStr_Compare:=0;
  677. exit;
  678. end;
  679. Maxi:=Length(S1);
  680. temp:=Length(S2);
  681. If MaxI>Temp then
  682. MaxI:=Temp;
  683. Temp:=CompareWord(S1[1],S2[1],MaxI);
  684. if temp=0 then
  685. temp:=Length(S1)-Length(S2);
  686. fpc_UnicodeStr_Compare:=Temp;
  687. end;
  688. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  689. {
  690. Compares 2 UnicodeStrings for equality only;
  691. The result is
  692. 0 if S1=S2
  693. <>0 if S1<>S2
  694. }
  695. Var
  696. MaxI : SizeInt;
  697. begin
  698. if pointer(S1)=pointer(S2) then
  699. exit(0);
  700. Maxi:=Length(S1);
  701. If MaxI<>Length(S2) then
  702. exit(-1)
  703. else
  704. exit(CompareWord(S1[1],S2[1],MaxI));
  705. end;
  706. {$ifdef VER2_4}
  707. // obsolete but needed for bootstrapping with 2.4
  708. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  709. begin
  710. if p=nil then
  711. HandleErrorFrame(201,get_frame);
  712. end;
  713. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  714. begin
  715. if (index>len) or (Index<1) then
  716. HandleErrorFrame(201,get_frame);
  717. end;
  718. {$else VER2_4}
  719. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  720. begin
  721. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  722. HandleErrorFrame(201,get_frame);
  723. end;
  724. {$endif VER2_4}
  725. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  726. {
  727. Sets The length of string S to L.
  728. Makes sure S is unique, and contains enough room.
  729. }
  730. Var
  731. Temp : Pointer;
  732. movelen: SizeInt;
  733. lens, lena : SizeUInt;
  734. begin
  735. if (l>0) then
  736. begin
  737. if Pointer(S)=nil then
  738. begin
  739. { Need a complete new string...}
  740. Pointer(s):=NewUnicodeString(l);
  741. end
  742. else
  743. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  744. begin
  745. Temp:=Pointer(s)-UnicodeFirstOff;
  746. lens:=MemSize(Temp);
  747. lena:=SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen);
  748. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  749. begin
  750. reallocmem(Temp, lena);
  751. Pointer(S):=Temp+UnicodeFirstOff;
  752. end;
  753. end
  754. else
  755. begin
  756. { Reallocation is needed... }
  757. Temp:=NewUnicodeString(L);
  758. if Length(S)>0 then
  759. begin
  760. if l < succ(length(s)) then
  761. movelen := l
  762. { also move terminating null }
  763. else
  764. movelen := succ(length(s));
  765. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  766. end;
  767. fpc_unicodestr_decr_ref(Pointer(S));
  768. Pointer(S):=Temp;
  769. end;
  770. { Force nil termination in case it gets shorter }
  771. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  772. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  773. end
  774. else { length=0, deallocate the string }
  775. fpc_unicodestr_decr_ref (Pointer(S));
  776. end;
  777. {*****************************************************************************
  778. Public functions, In interface.
  779. *****************************************************************************}
  780. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  781. begin
  782. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  783. end;
  784. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  785. var
  786. temp:unicodestring;
  787. begin
  788. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  789. if Length(temp)<DestSize then
  790. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  791. else
  792. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  793. Dest[DestSize-1]:=#0;
  794. result:=Dest;
  795. end;
  796. function WideCharToString(S : PWideChar) : UnicodeString;
  797. begin
  798. result:=WideCharLenToString(s,Length(WideString(s)));
  799. end;
  800. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  801. var
  802. temp:widestring;
  803. begin
  804. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  805. if Length(temp)<DestSize then
  806. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  807. else
  808. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  809. Dest[DestSize-1]:=#0;
  810. result:=Dest;
  811. end;
  812. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  813. begin
  814. SetLength(result,Len);
  815. Move(S^,Pointer(Result)^,Len*2);
  816. end;
  817. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  818. begin
  819. Dest:=UnicodeCharLenToString(Src,Len);
  820. end;
  821. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  822. begin
  823. Dest:=UnicodeCharLenToString(Src,Len);
  824. end;
  825. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  826. begin
  827. Dest:=UnicodeCharToString(S);
  828. end;
  829. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  830. begin
  831. SetLength(result,Len);
  832. Move(S^,Pointer(Result)^,Len*2);
  833. end;
  834. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  835. begin
  836. Dest:=WideCharLenToString(Src,Len);
  837. end;
  838. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  839. begin
  840. Dest:=WideCharLenToString(Src,Len);
  841. end;
  842. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  843. begin
  844. Dest:=WideCharToString(S);
  845. end;
  846. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  847. begin
  848. Dest:=WideCharToString(S);
  849. end;
  850. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  851. {
  852. Make sure reference count of S is 1,
  853. using copy-on-write semantics.
  854. }
  855. Var
  856. SNew : Pointer;
  857. L : SizeInt;
  858. begin
  859. pointer(result) := pointer(s);
  860. If Pointer(S)=Nil then
  861. exit;
  862. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  863. begin
  864. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  865. SNew:=NewUnicodeString (L);
  866. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  867. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  868. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  869. pointer(S):=SNew;
  870. pointer(result):=SNew;
  871. end;
  872. end;
  873. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  874. var
  875. ResultAddress : Pointer;
  876. begin
  877. ResultAddress:=Nil;
  878. dec(index);
  879. if Index < 0 then
  880. Index := 0;
  881. { Check Size. Accounts for Zero-length S, the double check is needed because
  882. Size can be maxint and will get <0 when adding index }
  883. if (Size>Length(S)) or
  884. (Index+Size>Length(S)) then
  885. Size:=Length(S)-Index;
  886. If Size>0 then
  887. begin
  888. ResultAddress:=NewUnicodeString(Size);
  889. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  890. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  891. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  892. end;
  893. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  894. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  895. end;
  896. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  897. var
  898. i,MaxLen : SizeInt;
  899. pc : punicodechar;
  900. begin
  901. Pos:=0;
  902. if Length(SubStr)>0 then
  903. begin
  904. MaxLen:=Length(source)-Length(SubStr);
  905. i:=0;
  906. pc:=@source[1];
  907. while (i<=MaxLen) do
  908. begin
  909. inc(i);
  910. if (SubStr[1]=pc^) and
  911. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  912. begin
  913. Pos:=i;
  914. exit;
  915. end;
  916. inc(pc);
  917. end;
  918. end;
  919. end;
  920. { Faster version for a unicodechar alone }
  921. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  922. var
  923. i: SizeInt;
  924. pc : punicodechar;
  925. begin
  926. pc:=@s[1];
  927. for i:=1 to length(s) do
  928. begin
  929. if pc^=c then
  930. begin
  931. pos:=i;
  932. exit;
  933. end;
  934. inc(pc);
  935. end;
  936. pos:=0;
  937. end;
  938. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  939. block, which is significant bloat without any sensible speed improvement. }
  940. Function Pos (const c : RawByteString; Const s : UnicodeString) : SizeInt;
  941. begin
  942. result:=Pos(UnicodeString(c),s);
  943. end;
  944. Function Pos (const c : ShortString; Const s : UnicodeString) : SizeInt;
  945. begin
  946. result:=Pos(UnicodeString(c),s);
  947. end;
  948. Function Pos (const c : UnicodeString; Const s : RawByteString) : SizeInt;
  949. begin
  950. result:=Pos(c,UnicodeString(s));
  951. end;
  952. { Faster version for a char alone. Must be implemented because }
  953. { pos(c: char; const s: shortstring) also exists, so otherwise }
  954. { using pos(char,pchar) will always call the shortstring version }
  955. { (exact match for first argument), also with $h+ (JM) }
  956. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  957. var
  958. i: SizeInt;
  959. wc : unicodechar;
  960. pc : punicodechar;
  961. begin
  962. wc:=c;
  963. pc:=@s[1];
  964. for i:=1 to length(s) do
  965. begin
  966. if pc^=wc then
  967. begin
  968. pos:=i;
  969. exit;
  970. end;
  971. inc(pc);
  972. end;
  973. pos:=0;
  974. end;
  975. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  976. Var
  977. LS : SizeInt;
  978. begin
  979. LS:=Length(S);
  980. if (Index>LS) or (Index<=0) or (Size<=0) then
  981. exit;
  982. UniqueString (S);
  983. { (Size+Index) will overflow if Size=MaxInt. }
  984. if Size>LS-Index then
  985. Size:=LS-Index+1;
  986. if Size<=LS-Index then
  987. begin
  988. Dec(Index);
  989. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  990. end;
  991. Setlength(s,LS-Size);
  992. end;
  993. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  994. var
  995. Temp : UnicodeString;
  996. LS : SizeInt;
  997. begin
  998. If Length(Source)=0 then
  999. exit;
  1000. if index <= 0 then
  1001. index := 1;
  1002. Ls:=Length(S);
  1003. if index > LS then
  1004. index := LS+1;
  1005. Dec(Index);
  1006. SetLength(Temp,Length(Source)+LS);
  1007. If Index>0 then
  1008. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1009. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1010. If (LS-Index)>0 then
  1011. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1012. S:=Temp;
  1013. end;
  1014. Function UpCase(c:UnicodeChar):UnicodeChar;
  1015. var
  1016. s : UnicodeString;
  1017. begin
  1018. s:=c;
  1019. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1020. end;
  1021. function UpCase(const s : UnicodeString) : UnicodeString;
  1022. begin
  1023. result:=widestringmanager.UpperUnicodeStringProc(s);
  1024. end;
  1025. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1026. begin
  1027. SetLength(S,Len);
  1028. If (Buf<>Nil) and (Len>0) then
  1029. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1030. end;
  1031. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1032. begin
  1033. If (Buf<>Nil) and (Len>0) then
  1034. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1035. else
  1036. SetLength(S,Len);
  1037. end;
  1038. {$ifndef FPUNONE}
  1039. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1040. Var
  1041. SS : String;
  1042. begin
  1043. fpc_Val_Real_UnicodeStr := 0;
  1044. if length(S) > 255 then
  1045. code := 256
  1046. else
  1047. begin
  1048. SS := S;
  1049. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1050. end;
  1051. end;
  1052. {$endif}
  1053. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1054. var ss:shortstring;
  1055. begin
  1056. if length(s)>255 then
  1057. code:=256
  1058. else
  1059. begin
  1060. ss:=s;
  1061. val(ss,fpc_val_enum_unicodestr,code);
  1062. end;
  1063. end;
  1064. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1065. Var
  1066. SS : String;
  1067. begin
  1068. if length(S) > 255 then
  1069. begin
  1070. fpc_Val_Currency_UnicodeStr:=0;
  1071. code := 256;
  1072. end
  1073. else
  1074. begin
  1075. SS := S;
  1076. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1077. end;
  1078. end;
  1079. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1080. Var
  1081. SS : ShortString;
  1082. begin
  1083. fpc_Val_UInt_UnicodeStr := 0;
  1084. if length(S) > 255 then
  1085. code := 256
  1086. else
  1087. begin
  1088. SS := S;
  1089. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1090. end;
  1091. end;
  1092. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1093. Var
  1094. SS : ShortString;
  1095. begin
  1096. fpc_Val_SInt_UnicodeStr:=0;
  1097. if length(S)>255 then
  1098. code:=256
  1099. else
  1100. begin
  1101. SS := S;
  1102. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1103. end;
  1104. end;
  1105. {$ifndef CPU64}
  1106. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1107. Var
  1108. SS : ShortString;
  1109. begin
  1110. fpc_Val_qword_UnicodeStr:=0;
  1111. if length(S)>255 then
  1112. code:=256
  1113. else
  1114. begin
  1115. SS := S;
  1116. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1117. end;
  1118. end;
  1119. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1120. Var
  1121. SS : ShortString;
  1122. begin
  1123. fpc_Val_int64_UnicodeStr:=0;
  1124. if length(S)>255 then
  1125. code:=256
  1126. else
  1127. begin
  1128. SS := S;
  1129. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1130. end;
  1131. end;
  1132. {$endif CPU64}
  1133. {$ifndef FPUNONE}
  1134. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1135. var
  1136. ss : shortstring;
  1137. begin
  1138. str_real(len,fr,d,treal_type(rt),ss);
  1139. s:=ss;
  1140. end;
  1141. {$endif}
  1142. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1143. var ss:shortstring;
  1144. begin
  1145. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1146. s:=ss;
  1147. end;
  1148. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1149. var ss:shortstring;
  1150. begin
  1151. fpc_shortstr_bool(b,len,ss);
  1152. s:=ss;
  1153. end;
  1154. {$ifdef FPC_HAS_STR_CURRENCY}
  1155. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1156. var
  1157. ss : shortstring;
  1158. begin
  1159. str(c:len:fr,ss);
  1160. s:=ss;
  1161. end;
  1162. {$endif FPC_HAS_STR_CURRENCY}
  1163. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1164. Var
  1165. SS : ShortString;
  1166. begin
  1167. Str (v:Len,SS);
  1168. S:=SS;
  1169. end;
  1170. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1171. Var
  1172. SS : ShortString;
  1173. begin
  1174. str(v:Len,SS);
  1175. S:=SS;
  1176. end;
  1177. {$ifndef CPU64}
  1178. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1179. Var
  1180. SS : ShortString;
  1181. begin
  1182. Str (v:Len,SS);
  1183. S:=SS;
  1184. end;
  1185. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1186. Var
  1187. SS : ShortString;
  1188. begin
  1189. str(v:Len,SS);
  1190. S:=SS;
  1191. end;
  1192. {$endif CPU64}
  1193. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1194. begin
  1195. if assigned(Source) then
  1196. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1197. else
  1198. Result:=0;
  1199. end;
  1200. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1201. var
  1202. i,j : SizeUInt;
  1203. lw : longword;
  1204. begin
  1205. result:=0;
  1206. if source=nil then
  1207. exit;
  1208. i:=0;
  1209. j:=0;
  1210. if assigned(Dest) then
  1211. begin
  1212. while (i<SourceChars) and (j<MaxDestBytes) do
  1213. begin
  1214. lw:=ord(Source[i]);
  1215. case lw of
  1216. 0..$7f:
  1217. begin
  1218. Dest[j]:=char(lw);
  1219. inc(j);
  1220. end;
  1221. $80..$7ff:
  1222. begin
  1223. if j+1>=MaxDestBytes then
  1224. break;
  1225. Dest[j]:=char($c0 or (lw shr 6));
  1226. Dest[j+1]:=char($80 or (lw and $3f));
  1227. inc(j,2);
  1228. end;
  1229. $800..$d7ff,$e000..$ffff:
  1230. begin
  1231. if j+2>=MaxDestBytes then
  1232. break;
  1233. Dest[j]:=char($e0 or (lw shr 12));
  1234. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1235. Dest[j+2]:=char($80 or (lw and $3f));
  1236. inc(j,3);
  1237. end;
  1238. $d800..$dbff:
  1239. {High Surrogates}
  1240. begin
  1241. if j+3>=MaxDestBytes then
  1242. break;
  1243. if (i+1<sourcechars) and
  1244. (word(Source[i+1]) >= $dc00) and
  1245. (word(Source[i+1]) <= $dfff) then
  1246. begin
  1247. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1248. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1249. Dest[j]:=char($f0 or (lw shr 18));
  1250. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1251. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1252. Dest[j+3]:=char($80 or (lw and $3f));
  1253. inc(j,4);
  1254. inc(i);
  1255. end;
  1256. end;
  1257. end;
  1258. inc(i);
  1259. end;
  1260. if j>SizeUInt(MaxDestBytes-1) then
  1261. j:=MaxDestBytes-1;
  1262. Dest[j]:=#0;
  1263. end
  1264. else
  1265. begin
  1266. while i<SourceChars do
  1267. begin
  1268. case word(Source[i]) of
  1269. $0..$7f:
  1270. inc(j);
  1271. $80..$7ff:
  1272. inc(j,2);
  1273. $800..$d7ff,$e000..$ffff:
  1274. inc(j,3);
  1275. $d800..$dbff:
  1276. begin
  1277. if (i+1<sourcechars) and
  1278. (word(Source[i+1]) >= $dc00) and
  1279. (word(Source[i+1]) <= $dfff) then
  1280. begin
  1281. inc(j,4);
  1282. inc(i);
  1283. end;
  1284. end;
  1285. end;
  1286. inc(i);
  1287. end;
  1288. end;
  1289. result:=j+1;
  1290. end;
  1291. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1292. begin
  1293. if assigned(Source) then
  1294. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1295. else
  1296. Result:=0;
  1297. end;
  1298. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1299. const
  1300. UNICODE_INVALID=63;
  1301. var
  1302. InputUTF8: SizeUInt;
  1303. IBYTE: BYTE;
  1304. OutputUnicode: SizeUInt;
  1305. PRECHAR: SizeUInt;
  1306. TempBYTE: BYTE;
  1307. CharLen: SizeUint;
  1308. LookAhead: SizeUInt;
  1309. UC: SizeUInt;
  1310. begin
  1311. if not assigned(Source) then
  1312. begin
  1313. result:=0;
  1314. exit;
  1315. end;
  1316. result:=SizeUInt(-1);
  1317. InputUTF8:=0;
  1318. OutputUnicode:=0;
  1319. PreChar:=0;
  1320. if Assigned(Dest) Then
  1321. begin
  1322. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1323. begin
  1324. IBYTE:=byte(Source[InputUTF8]);
  1325. if (IBYTE and $80) = 0 then
  1326. begin
  1327. //One character US-ASCII, convert it to unicode
  1328. if IBYTE = 10 then
  1329. begin
  1330. If (PreChar<>13) and FALSE then
  1331. begin
  1332. //Expand to crlf, conform UTF-8.
  1333. //This procedure will break the memory alocation by
  1334. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1335. if OutputUnicode+1<MaxDestChars then
  1336. begin
  1337. Dest[OutputUnicode]:=WideChar(13);
  1338. inc(OutputUnicode);
  1339. Dest[OutputUnicode]:=WideChar(10);
  1340. inc(OutputUnicode);
  1341. PreChar:=10;
  1342. end
  1343. else
  1344. begin
  1345. Dest[OutputUnicode]:=WideChar(13);
  1346. inc(OutputUnicode);
  1347. end;
  1348. end
  1349. else
  1350. begin
  1351. Dest[OutputUnicode]:=WideChar(IBYTE);
  1352. inc(OutputUnicode);
  1353. PreChar:=IBYTE;
  1354. end;
  1355. end
  1356. else
  1357. begin
  1358. Dest[OutputUnicode]:=WideChar(IBYTE);
  1359. inc(OutputUnicode);
  1360. PreChar:=IBYTE;
  1361. end;
  1362. inc(InputUTF8);
  1363. end
  1364. else
  1365. begin
  1366. TempByte:=IBYTE;
  1367. CharLen:=0;
  1368. while (TempBYTE and $80)<>0 do
  1369. begin
  1370. TempBYTE:=(TempBYTE shl 1) and $FE;
  1371. inc(CharLen);
  1372. end;
  1373. //Test for the "CharLen" conforms UTF-8 string
  1374. //This means the 10xxxxxx pattern.
  1375. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1376. begin
  1377. //Insuficient chars in string to decode
  1378. //UTF-8 array. Fallback to single char.
  1379. CharLen:= 1;
  1380. end;
  1381. for LookAhead := 1 to CharLen-1 do
  1382. begin
  1383. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1384. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1385. begin
  1386. //Invalid UTF-8 sequence, fallback.
  1387. CharLen:= LookAhead;
  1388. break;
  1389. end;
  1390. end;
  1391. UC:=$FFFF;
  1392. case CharLen of
  1393. 1: begin
  1394. //Not valid UTF-8 sequence
  1395. UC:=UNICODE_INVALID;
  1396. end;
  1397. 2: begin
  1398. //Two bytes UTF, convert it
  1399. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1400. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1401. if UC <= $7F then
  1402. begin
  1403. //Invalid UTF sequence.
  1404. UC:=UNICODE_INVALID;
  1405. end;
  1406. end;
  1407. 3: begin
  1408. //Three bytes, convert it to unicode
  1409. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1410. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1411. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1412. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1413. begin
  1414. //Invalid UTF-8 sequence
  1415. UC:= UNICODE_INVALID;
  1416. End;
  1417. end;
  1418. 4: begin
  1419. //Four bytes, convert it to two unicode characters
  1420. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1421. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1422. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1423. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1424. if (UC < $10000) or (UC > $10FFFF) then
  1425. begin
  1426. UC:= UNICODE_INVALID;
  1427. end
  1428. else
  1429. begin
  1430. { only store pair if room }
  1431. dec(UC,$10000);
  1432. if (OutputUnicode<MaxDestChars-1) then
  1433. begin
  1434. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1435. inc(OutputUnicode);
  1436. UC:=(UC and $3ff) + $DC00;
  1437. end
  1438. else
  1439. begin
  1440. InputUTF8:= InputUTF8 + CharLen;
  1441. { don't store anything }
  1442. CharLen:=0;
  1443. end;
  1444. end;
  1445. end;
  1446. 5,6,7: begin
  1447. //Invalid UTF8 to unicode conversion,
  1448. //mask it as invalid UNICODE too.
  1449. UC:=UNICODE_INVALID;
  1450. end;
  1451. end;
  1452. if CharLen > 0 then
  1453. begin
  1454. PreChar:=UC;
  1455. Dest[OutputUnicode]:=WideChar(UC);
  1456. inc(OutputUnicode);
  1457. end;
  1458. InputUTF8:= InputUTF8 + CharLen;
  1459. end;
  1460. end;
  1461. Result:=OutputUnicode+1;
  1462. end
  1463. else
  1464. begin
  1465. while (InputUTF8<SourceBytes) do
  1466. begin
  1467. IBYTE:=byte(Source[InputUTF8]);
  1468. if (IBYTE and $80) = 0 then
  1469. begin
  1470. //One character US-ASCII, convert it to unicode
  1471. if IBYTE = 10 then
  1472. begin
  1473. if (PreChar<>13) and FALSE then
  1474. begin
  1475. //Expand to crlf, conform UTF-8.
  1476. //This procedure will break the memory alocation by
  1477. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1478. inc(OutputUnicode,2);
  1479. PreChar:=10;
  1480. end
  1481. else
  1482. begin
  1483. inc(OutputUnicode);
  1484. PreChar:=IBYTE;
  1485. end;
  1486. end
  1487. else
  1488. begin
  1489. inc(OutputUnicode);
  1490. PreChar:=IBYTE;
  1491. end;
  1492. inc(InputUTF8);
  1493. end
  1494. else
  1495. begin
  1496. TempByte:=IBYTE;
  1497. CharLen:=0;
  1498. while (TempBYTE and $80)<>0 do
  1499. begin
  1500. TempBYTE:=(TempBYTE shl 1) and $FE;
  1501. inc(CharLen);
  1502. end;
  1503. //Test for the "CharLen" conforms UTF-8 string
  1504. //This means the 10xxxxxx pattern.
  1505. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1506. begin
  1507. //Insuficient chars in string to decode
  1508. //UTF-8 array. Fallback to single char.
  1509. CharLen:= 1;
  1510. end;
  1511. for LookAhead := 1 to CharLen-1 do
  1512. begin
  1513. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1514. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1515. begin
  1516. //Invalid UTF-8 sequence, fallback.
  1517. CharLen:= LookAhead;
  1518. break;
  1519. end;
  1520. end;
  1521. UC:=$FFFF;
  1522. case CharLen of
  1523. 1: begin
  1524. //Not valid UTF-8 sequence
  1525. UC:=UNICODE_INVALID;
  1526. end;
  1527. 2: begin
  1528. //Two bytes UTF, convert it
  1529. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1530. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1531. if UC <= $7F then
  1532. begin
  1533. //Invalid UTF sequence.
  1534. UC:=UNICODE_INVALID;
  1535. end;
  1536. end;
  1537. 3: begin
  1538. //Three bytes, convert it to unicode
  1539. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1540. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1541. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1542. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1543. begin
  1544. //Invalid UTF-8 sequence
  1545. UC:= UNICODE_INVALID;
  1546. end;
  1547. end;
  1548. 4: begin
  1549. //Four bytes, convert it to two unicode characters
  1550. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1551. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1552. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1553. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1554. if (UC < $10000) or (UC > $10FFFF) then
  1555. UC:= UNICODE_INVALID
  1556. else
  1557. { extra character character }
  1558. inc(OutputUnicode);
  1559. end;
  1560. 5,6,7: begin
  1561. //Invalid UTF8 to unicode conversion,
  1562. //mask it as invalid UNICODE too.
  1563. UC:=UNICODE_INVALID;
  1564. end;
  1565. end;
  1566. if CharLen > 0 then
  1567. begin
  1568. PreChar:=UC;
  1569. inc(OutputUnicode);
  1570. end;
  1571. InputUTF8:= InputUTF8 + CharLen;
  1572. end;
  1573. end;
  1574. Result:=OutputUnicode+1;
  1575. end;
  1576. end;
  1577. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1578. begin
  1579. Result:=UTF8Encode(UnicodeString(s));
  1580. end;
  1581. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1582. var
  1583. i : SizeInt;
  1584. hs : UTF8String;
  1585. begin
  1586. result:='';
  1587. if s='' then
  1588. exit;
  1589. SetLength(hs,length(s)*3);
  1590. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1591. if i>0 then
  1592. begin
  1593. SetLength(hs,i-1);
  1594. result:=hs;
  1595. end;
  1596. end;
  1597. function UTF8Decode(const s : RawByteString): UnicodeString;
  1598. var
  1599. i : SizeInt;
  1600. hs : UnicodeString;
  1601. begin
  1602. result:='';
  1603. if s='' then
  1604. exit;
  1605. SetLength(hs,length(s));
  1606. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1607. if i>0 then
  1608. begin
  1609. SetLength(hs,i-1);
  1610. result:=hs;
  1611. end;
  1612. end;
  1613. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1614. begin
  1615. Result:=Utf8Encode(s);
  1616. end;
  1617. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1618. begin
  1619. Result:=Utf8Decode(s);
  1620. end;
  1621. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1622. var
  1623. i, reslen: sizeint;
  1624. w: longint;
  1625. begin
  1626. reslen:=0;
  1627. i:=0;
  1628. { calculate required length }
  1629. while (i<len) do
  1630. begin
  1631. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1632. inc(i)
  1633. else if (p[i]<=#$dbff) and
  1634. (i+1<len) and
  1635. (p[i+1]>=#$dc00) and
  1636. (p[i+1]<=#$dfff) then
  1637. inc(i,2)
  1638. else
  1639. inc(i);
  1640. inc(reslen);
  1641. end;
  1642. SetLength(res,reslen+1); { +1 for null termination }
  1643. reslen:=0;
  1644. i:=0;
  1645. { do conversion }
  1646. while (i<len) do
  1647. begin
  1648. w:=ord(p[i]);
  1649. if (w<=$d7ff) or (w>=$e000) then
  1650. res[reslen]:=w
  1651. else if (w<=$dbff) and
  1652. (i+1<len) and
  1653. (p[i+1]>=#$dc00) and
  1654. (p[i+1]<=#$dfff) then
  1655. begin
  1656. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1657. inc(i);
  1658. end
  1659. else { invalid surrogate pair }
  1660. res[reslen]:=w;
  1661. inc(i);
  1662. inc(reslen);
  1663. end;
  1664. res[reslen]:=0;
  1665. end;
  1666. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1667. begin
  1668. UCS4Encode(PWideChar(s),Length(s),result);
  1669. end;
  1670. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1671. begin
  1672. UCS4Encode(PWideChar(s),Length(s),result);
  1673. end;
  1674. { dest should point to previously allocated wide/unicodestring }
  1675. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1676. var
  1677. i: sizeint;
  1678. nc: UCS4Char;
  1679. begin
  1680. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1681. begin
  1682. nc:=s[i];
  1683. if (nc<$ffff) then
  1684. dest^:=widechar(nc)
  1685. else if (dword(nc)<=$10ffff) then
  1686. begin
  1687. dest^:=widechar(nc shr 10 + $d7c0);
  1688. { subtracting $10000 doesn't change low 10 bits }
  1689. dest[1]:=widechar(nc and $3ff + $dc00);
  1690. inc(dest);
  1691. end
  1692. else { invalid code point }
  1693. dest^:='?';
  1694. inc(dest);
  1695. end;
  1696. end;
  1697. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1698. var
  1699. i : SizeInt;
  1700. reslen : SizeInt;
  1701. begin
  1702. reslen:=0;
  1703. for i:=0 to length(s)-2 do { skip terminating #0 }
  1704. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1705. SetLength(result,reslen);
  1706. UCS4Decode(s,pointer(result));
  1707. end;
  1708. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1709. var
  1710. i : SizeInt;
  1711. reslen : SizeInt;
  1712. begin
  1713. reslen:=0;
  1714. for i:=0 to length(s)-2 do { skip terminating #0 }
  1715. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1716. SetLength(result,reslen);
  1717. UCS4Decode(s,pointer(result));
  1718. end;
  1719. const
  1720. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  1721. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  1722. procedure unimplementedunicodestring;
  1723. begin
  1724. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1725. If IsConsole then
  1726. begin
  1727. Writeln(StdErr,SNoUnicodestrings);
  1728. Writeln(StdErr,SRecompileWithUnicodestrings);
  1729. end;
  1730. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1731. HandleErrorFrame(233,get_frame);
  1732. end;
  1733. function StringElementSize(const S: UnicodeString): Word; overload;
  1734. begin
  1735. if assigned(Pointer(S)) then
  1736. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  1737. else
  1738. Result:=SizeOf(UnicodeChar);
  1739. end;
  1740. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  1741. begin
  1742. if assigned(Pointer(S)) then
  1743. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  1744. else
  1745. Result:=0;
  1746. end;
  1747. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  1748. begin
  1749. {$ifdef FPC_HAS_CPSTRING}
  1750. if assigned(Pointer(S)) then
  1751. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  1752. else
  1753. {$endif FPC_HAS_CPSTRING}
  1754. Result:=DefaultUnicodeCodePage;
  1755. end;
  1756. {$warnings off}
  1757. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  1758. begin
  1759. unimplementedunicodestring;
  1760. end;
  1761. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  1762. begin
  1763. unimplementedunicodestring;
  1764. end;
  1765. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  1766. begin
  1767. unimplementedunicodestring;
  1768. end;
  1769. {$warnings on}
  1770. procedure initunicodestringmanager;
  1771. begin
  1772. {$ifndef HAS_WIDESTRINGMANAGER}
  1773. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  1774. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  1775. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  1776. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  1777. {$endif HAS_WIDESTRINGMANAGER}
  1778. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  1779. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  1780. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1781. {$ifndef HAS_WIDESTRINGMANAGER}
  1782. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  1783. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  1784. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  1785. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  1786. {$endif HAS_WIDESTRINGMANAGER}
  1787. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  1788. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  1789. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1790. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1791. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1792. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  1793. end;