ustrings.inc 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of chars. Multiply with
  20. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  21. @ : String + Terminating #0;
  22. Punicodechar(Unicodestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PUnicodeRec = ^TUnicodeRec;
  29. TUnicodeRec = Packed Record
  30. CodePage : TSystemCodePage;
  31. ElementSize : Word;
  32. {$ifdef CPU64}
  33. { align fields }
  34. Dummy : DWord;
  35. {$endif CPU64}
  36. Ref : SizeInt;
  37. Len : SizeInt;
  38. First : UnicodeChar;
  39. end;
  40. Const
  41. UnicodeRecLen = SizeOf(TUnicodeRec);
  42. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overwritten for the Current Locale
  47. }
  48. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  49. var
  50. i : SizeInt;
  51. p : PAnsiChar;
  52. begin
  53. setlength(dest,len);
  54. p:=pointer(dest); {SetLength guarantees that dest is unique}
  55. for i:=1 to len do
  56. begin
  57. if word(source^)<256 then
  58. p^:=char(word(source^))
  59. else
  60. p^:='?';
  61. inc(source);
  62. inc(p);
  63. end;
  64. end;
  65. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  66. var
  67. i : SizeInt;
  68. p : PUnicodeChar;
  69. begin
  70. setlength(dest,len);
  71. p:=pointer(dest); {SetLength guarantees that dest is unique}
  72. for i:=1 to len do
  73. begin
  74. p^:=unicodechar(byte(source^));
  75. inc(source);
  76. inc(p);
  77. end;
  78. end;
  79. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  80. begin
  81. DefaultCharLengthPChar:=length(Str);
  82. end;
  83. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  84. begin
  85. if str[0]<>#0 then
  86. DefaultCodePointLength:=1
  87. else
  88. DefaultCodePointLength:=0;
  89. end;
  90. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  91. begin
  92. { don't raise an exception here. We need this for text file handling }
  93. Result:=DefaultSystemCodePage;
  94. end;
  95. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  96. begin
  97. manager:=widestringmanager;
  98. end;
  99. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  100. begin
  101. Old:=widestringmanager;
  102. widestringmanager:=New;
  103. end;
  104. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  105. begin
  106. widestringmanager:=New;
  107. end;
  108. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  109. begin
  110. manager:=widestringmanager;
  111. end;
  112. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  113. begin
  114. Old:=widestringmanager;
  115. widestringmanager:=New;
  116. end;
  117. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  118. begin
  119. widestringmanager:=New;
  120. end;
  121. {****************************************************************************
  122. Internal functions, not in interface.
  123. ****************************************************************************}
  124. procedure UnicodeStringError;
  125. begin
  126. HandleErrorFrame(204,get_frame);
  127. end;
  128. {$ifdef UnicodeStrDebug}
  129. Procedure DumpUnicodeRec(S : Pointer);
  130. begin
  131. If S=Nil then
  132. Writeln ('String is nil')
  133. Else
  134. Begin
  135. With PUnicodeRec(S-UnicodeFirstOff)^ do
  136. begin
  137. Write ('(Len:',len);
  138. Writeln (' Ref: ',ref,')');
  139. end;
  140. end;
  141. end;
  142. {$endif}
  143. Function NewUnicodeString(Len : SizeInt) : Pointer;
  144. {
  145. Allocate a new UnicodeString on the heap.
  146. initialize it to zero length and reference count 1.
  147. }
  148. Var
  149. P : Pointer;
  150. begin
  151. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  152. If P<>Nil then
  153. begin
  154. PUnicodeRec(P)^.Len:=Len; { Initial length }
  155. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  156. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  157. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  158. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  159. inc(p,UnicodeFirstOff); { Points to string now }
  160. end
  161. else
  162. UnicodeStringError;
  163. NewUnicodeString:=P;
  164. end;
  165. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  166. {
  167. Decreases the ReferenceCount of a non constant unicodestring;
  168. If the reference count is zero, deallocate the string;
  169. }
  170. Var
  171. p: PUnicodeRec;
  172. Begin
  173. { Zero string }
  174. if S=Nil then
  175. exit;
  176. { check for constant strings ...}
  177. p:=PUnicodeRec(S-UnicodeFirstOff);
  178. if p^.Ref<0 then
  179. exit;
  180. { declocked does a MT safe dec and returns true, if the counter is 0 }
  181. if declocked(p^.Ref) then
  182. begin
  183. FreeMem(p);
  184. S:=nil;
  185. end;
  186. end;
  187. { alias for internal use }
  188. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  189. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  190. Begin
  191. If S=Nil then
  192. exit;
  193. { constant string ? }
  194. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  195. exit;
  196. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  197. end;
  198. { alias for internal use }
  199. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  200. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  201. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  202. {
  203. Converts a UnicodeString to a ShortString;
  204. }
  205. Var
  206. Size : SizeInt;
  207. temp : ansistring;
  208. begin
  209. result:='';
  210. Size:=Length(S2);
  211. if Size>0 then
  212. begin
  213. If Size>high_of_res then
  214. Size:=high_of_res;
  215. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  216. result:=temp;
  217. end;
  218. end;
  219. {$else FPC_STRTOSHORTSTRINGPROC}
  220. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  221. {
  222. Converts a UnicodeString to a ShortString;
  223. }
  224. Var
  225. Size : SizeInt;
  226. temp : ansistring;
  227. begin
  228. res:='';
  229. Size:=Length(S2);
  230. if Size>0 then
  231. begin
  232. If Size>high(res) then
  233. Size:=high(res);
  234. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  235. res:=temp;
  236. end;
  237. end;
  238. {$endif FPC_STRTOSHORTSTRINGPROC}
  239. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  240. {
  241. Converts a ShortString to a UnicodeString;
  242. }
  243. Var
  244. Size : SizeInt;
  245. begin
  246. result:='';
  247. Size:=Length(S2);
  248. if Size>0 then
  249. begin
  250. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  251. { Terminating Zero }
  252. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  253. end;
  254. end;
  255. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  256. {
  257. Converts a UnicodeString to an AnsiString
  258. }
  259. Var
  260. Size : SizeInt;
  261. {$ifndef FPC_HAS_CPSTRING}
  262. cp : TSystemCodePage;
  263. {$endif FPC_HAS_CPSTRING}
  264. begin
  265. {$ifndef FPC_HAS_CPSTRING}
  266. cp:=DefaultSystemCodePage;
  267. {$endif FPC_HAS_CPSTRING}
  268. result:='';
  269. Size:=Length(S2);
  270. if Size>0 then
  271. begin
  272. if (cp=CP_ACP) then
  273. cp:=DefaultSystemCodePage;
  274. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  275. end;
  276. end;
  277. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  278. {
  279. Converts an AnsiString to a UnicodeString;
  280. }
  281. Var
  282. Size : SizeInt;
  283. cp: TSystemCodePage;
  284. begin
  285. result:='';
  286. Size:=Length(S2);
  287. if Size>0 then
  288. begin
  289. cp:=StringCodePage(S2);
  290. if (cp=CP_ACP) then
  291. cp:=DefaultSystemCodePage;
  292. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  293. end;
  294. end;
  295. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  296. begin
  297. SetLength(Result,Length(S2));
  298. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  299. end;
  300. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  301. begin
  302. SetLength(Result,Length(S2));
  303. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  304. end;
  305. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  306. var
  307. Size : SizeInt;
  308. {$ifndef FPC_HAS_CPSTRING}
  309. cp : TSystemCodePage;
  310. {$endif FPC_HAS_CPSTRING}
  311. begin
  312. {$ifndef FPC_HAS_CPSTRING}
  313. cp:=DefaultSystemCodePage;
  314. {$endif FPC_HAS_CPSTRING}
  315. result:='';
  316. if p=nil then
  317. exit;
  318. Size := IndexWord(p^, -1, 0);
  319. if Size>0 then
  320. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  321. end;
  322. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  323. var
  324. Size : SizeInt;
  325. begin
  326. result:='';
  327. if p=nil then
  328. exit;
  329. Size := IndexWord(p^, -1, 0);
  330. Setlength(result,Size);
  331. if Size>0 then
  332. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  333. end;
  334. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  335. var
  336. Size : SizeInt;
  337. begin
  338. result:='';
  339. if p=nil then
  340. exit;
  341. Size := IndexWord(p^, -1, 0);
  342. Setlength(result,Size);
  343. if Size>0 then
  344. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  345. end;
  346. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  347. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  348. var
  349. Size : SizeInt;
  350. temp: ansistring;
  351. begin
  352. result:='';
  353. if p=nil then
  354. exit;
  355. Size := IndexWord(p^, $7fffffff, 0);
  356. if Size>0 then
  357. begin
  358. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  359. result:=temp;
  360. end;
  361. end;
  362. {$else FPC_STRTOSHORTSTRINGPROC}
  363. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  364. var
  365. Size : SizeInt;
  366. temp: ansistring;
  367. begin
  368. res:='';
  369. if p=nil then
  370. exit;
  371. Size:=IndexWord(p^, high(PtrInt), 0);
  372. if Size>0 then
  373. begin
  374. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  375. res:=temp;
  376. end;
  377. end;
  378. {$endif FPC_STRTOSHORTSTRINGPROC}
  379. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  380. var
  381. Size : SizeInt;
  382. {$ifndef FPC_HAS_CPSTRING}
  383. cp : TSystemCodePage;
  384. {$endif FPC_HAS_CPSTRING}
  385. begin
  386. {$ifndef FPC_HAS_CPSTRING}
  387. cp:=DefaultSystemCodePage;
  388. {$endif FPC_HAS_CPSTRING}
  389. result:='';
  390. if p=nil then
  391. exit;
  392. Size := IndexWord(p^, -1, 0);
  393. if Size>0 then
  394. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  395. end;
  396. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  397. Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
  398. var
  399. Size : SizeInt;
  400. temp: ansistring;
  401. begin
  402. result:='';
  403. if p=nil then
  404. exit;
  405. Size := IndexWord(p^, $7fffffff, 0);
  406. if Size>0 then
  407. begin
  408. widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
  409. result:=temp;
  410. end;
  411. end;
  412. {$else FPC_STRTOSHORTSTRINGPROC}
  413. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  414. var
  415. Size : SizeInt;
  416. temp: ansistring;
  417. begin
  418. res:='';
  419. if p=nil then
  420. exit;
  421. Size:=IndexWord(p^, high(PtrInt), 0);
  422. if Size>0 then
  423. begin
  424. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  425. res:=temp;
  426. end;
  427. end;
  428. {$endif FPC_STRTOSHORTSTRINGPROC}
  429. { checked against the ansistring routine, 2001-05-27 (FK) }
  430. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  431. {
  432. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  433. }
  434. begin
  435. If S2<>nil then
  436. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  437. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  438. { Decrease the reference count on the old S1 }
  439. fpc_unicodestr_decr_ref (S1);
  440. s1:=s2;
  441. end;
  442. { alias for internal use }
  443. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  444. {$ifndef STR_CONCAT_PROCS}
  445. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  446. Var
  447. Size,Location : SizeInt;
  448. pc : punicodechar;
  449. begin
  450. { only assign if s1 or s2 is empty }
  451. if (S1='') then
  452. begin
  453. result:=s2;
  454. exit;
  455. end;
  456. if (S2='') then
  457. begin
  458. result:=s1;
  459. exit;
  460. end;
  461. Location:=Length(S1);
  462. Size:=length(S2);
  463. SetLength(result,Size+Location);
  464. pc:=punicodechar(result);
  465. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  466. inc(pc,location);
  467. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  468. end;
  469. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  470. Var
  471. i : Longint;
  472. p : pointer;
  473. pc : punicodechar;
  474. Size,NewSize : SizeInt;
  475. begin
  476. { First calculate size of the result so we can do
  477. a single call to SetLength() }
  478. NewSize:=0;
  479. for i:=low(sarr) to high(sarr) do
  480. inc(Newsize,length(sarr[i]));
  481. SetLength(result,NewSize);
  482. pc:=punicodechar(result);
  483. for i:=low(sarr) to high(sarr) do
  484. begin
  485. p:=pointer(sarr[i]);
  486. if assigned(p) then
  487. begin
  488. Size:=length(unicodestring(p));
  489. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  490. inc(pc,size);
  491. end;
  492. end;
  493. end;
  494. {$else STR_CONCAT_PROCS}
  495. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  496. Var
  497. Size,Location : SizeInt;
  498. same : boolean;
  499. begin
  500. { only assign if s1 or s2 is empty }
  501. if (S1='') then
  502. begin
  503. DestS:=s2;
  504. exit;
  505. end;
  506. if (S2='') then
  507. begin
  508. DestS:=s1;
  509. exit;
  510. end;
  511. Location:=Length(S1);
  512. Size:=length(S2);
  513. { Use Pointer() typecasts to prevent extra conversion code }
  514. if Pointer(DestS)=Pointer(S1) then
  515. begin
  516. same:=Pointer(S1)=Pointer(S2);
  517. SetLength(DestS,Size+Location);
  518. if same then
  519. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  520. else
  521. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  522. end
  523. else if Pointer(DestS)=Pointer(S2) then
  524. begin
  525. SetLength(DestS,Size+Location);
  526. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  527. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  528. end
  529. else
  530. begin
  531. DestS:='';
  532. SetLength(DestS,Size+Location);
  533. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  534. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  535. end;
  536. end;
  537. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  538. Var
  539. i : Longint;
  540. p,pc : pointer;
  541. Size,NewLen : SizeInt;
  542. lowstart : longint;
  543. destcopy : pointer;
  544. OldDestLen : SizeInt;
  545. begin
  546. if high(sarr)=0 then
  547. begin
  548. DestS:='';
  549. exit;
  550. end;
  551. destcopy:=nil;
  552. lowstart:=low(sarr);
  553. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  554. inc(lowstart);
  555. { Check for another reuse, then we can't use
  556. the append optimization }
  557. for i:=lowstart to high(sarr) do
  558. begin
  559. if Pointer(DestS)=Pointer(sarr[i]) then
  560. begin
  561. { if DestS is used somewhere in the middle of the expression,
  562. we need to make sure the original string still exists after
  563. we empty/modify DestS.
  564. This trick only works with reference counted strings. Therefor
  565. this optimization is disabled for WINLIKEUNICODESTRING }
  566. destcopy:=pointer(dests);
  567. fpc_UnicodeStr_Incr_Ref(destcopy);
  568. lowstart:=low(sarr);
  569. break;
  570. end;
  571. end;
  572. { Start with empty DestS if we start with concatting
  573. the first array element }
  574. if lowstart=low(sarr) then
  575. DestS:='';
  576. OldDestLen:=length(DestS);
  577. { Calculate size of the result so we can do
  578. a single call to SetLength() }
  579. NewLen:=0;
  580. for i:=low(sarr) to high(sarr) do
  581. inc(NewLen,length(sarr[i]));
  582. SetLength(DestS,NewLen);
  583. { Concat all strings, except the string we already
  584. copied in DestS }
  585. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  586. for i:=lowstart to high(sarr) do
  587. begin
  588. p:=pointer(sarr[i]);
  589. if assigned(p) then
  590. begin
  591. Size:=length(unicodestring(p));
  592. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  593. inc(pc,size*sizeof(UnicodeChar));
  594. end;
  595. end;
  596. fpc_UnicodeStr_Decr_Ref(destcopy);
  597. end;
  598. {$endif STR_CONCAT_PROCS}
  599. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  600. var
  601. w: unicodestring;
  602. begin
  603. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  604. fpc_Char_To_UChar:=w[1];
  605. end;
  606. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  607. {
  608. Converts a Char to a UnicodeString;
  609. }
  610. begin
  611. Setlength(fpc_Char_To_UnicodeStr,1);
  612. fpc_Char_To_UnicodeStr[1]:=c;
  613. { Terminating Zero }
  614. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  615. end;
  616. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  617. {
  618. Converts a UnicodeChar to a Char;
  619. }
  620. var
  621. s: ansistring;
  622. begin
  623. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  624. if length(s)=1 then
  625. fpc_UChar_To_Char:= s[1]
  626. else
  627. fpc_UChar_To_Char:='?';
  628. end;
  629. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  630. {
  631. Converts a WideChar to a UnicodeString;
  632. }
  633. begin
  634. Setlength (Result,1);
  635. Result[1]:= c;
  636. end;
  637. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  638. var
  639. w: widestring;
  640. begin
  641. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  642. fpc_Char_To_WChar:=w[1];
  643. end;
  644. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  645. {
  646. Converts a WideChar to a Char;
  647. }
  648. var
  649. s: ansistring;
  650. begin
  651. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  652. if length(s)=1 then
  653. fpc_WChar_To_Char:= s[1]
  654. else
  655. fpc_WChar_To_Char:='?';
  656. end;
  657. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  658. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  659. {
  660. Converts a WideChar to a ShortString;
  661. }
  662. var
  663. s: ansistring;
  664. begin
  665. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  666. fpc_WChar_To_ShortStr:= s;
  667. end;
  668. {$else FPC_STRTOSHORTSTRINGPROC}
  669. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  670. {
  671. Converts a WideChar to a ShortString;
  672. }
  673. var
  674. s: ansistring;
  675. begin
  676. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  677. res:=s;
  678. end;
  679. {$endif FPC_STRTOSHORTSTRINGPROC}
  680. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  681. {
  682. Converts a UnicodeChar to a UnicodeString;
  683. }
  684. begin
  685. Setlength (fpc_UChar_To_UnicodeStr,1);
  686. fpc_UChar_To_UnicodeStr[1]:= c;
  687. end;
  688. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  689. {
  690. Converts a UnicodeChar to a AnsiString;
  691. }
  692. {$ifndef FPC_HAS_CPSTRING}
  693. var
  694. cp : TSystemCodePage;
  695. {$endif FPC_HAS_CPSTRING}
  696. begin
  697. {$ifndef FPC_HAS_CPSTRING}
  698. cp:=DefaultSystemCodePage;
  699. {$endif FPC_HAS_CPSTRING}
  700. if (cp=CP_ACP) then
  701. cp:=DefaultSystemCodePage;
  702. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  703. end;
  704. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  705. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  706. {
  707. Converts a UnicodeChar to a ShortString;
  708. }
  709. var
  710. s: ansistring;
  711. begin
  712. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  713. fpc_UChar_To_ShortStr:= s;
  714. end;
  715. {$else FPC_STRTOSHORTSTRINGPROC}
  716. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  717. {
  718. Converts a UnicodeChar to a ShortString;
  719. }
  720. var
  721. s: ansistring;
  722. begin
  723. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  724. res:=s;
  725. end;
  726. {$endif FPC_STRTOSHORTSTRINGPROC}
  727. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  728. Var
  729. L : SizeInt;
  730. begin
  731. if (not assigned(p)) or (p[0]=#0) Then
  732. begin
  733. fpc_pchar_to_unicodestr := '';
  734. exit;
  735. end;
  736. l:=IndexChar(p^,-1,#0);
  737. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  738. end;
  739. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  740. var
  741. i : SizeInt;
  742. begin
  743. if zerobased then
  744. begin
  745. if arr[0]=#0 Then
  746. begin
  747. fpc_chararray_to_unicodestr:='';
  748. exit;
  749. end;
  750. i:=IndexChar(arr,high(arr)+1,#0);
  751. if i=-1 then
  752. i:=high(arr)+1;
  753. end
  754. else
  755. i:=high(arr)+1;
  756. SetLength(fpc_CharArray_To_UnicodeStr,i);
  757. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  758. end;
  759. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  760. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  761. var
  762. l: longint;
  763. index: longint;
  764. len: byte;
  765. temp: ansistring;
  766. begin
  767. l := high(arr)+1;
  768. if l>=256 then
  769. l:=255
  770. else if l<0 then
  771. l:=0;
  772. if zerobased then
  773. begin
  774. index:=IndexWord(arr[0],l,0);
  775. if (index < 0) then
  776. len := l
  777. else
  778. len := index;
  779. end
  780. else
  781. len := l;
  782. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  783. fpc_UnicodeCharArray_To_ShortStr := temp;
  784. end;
  785. {$else FPC_STRTOSHORTSTRINGPROC}
  786. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  787. var
  788. l: longint;
  789. index: ptrint;
  790. len: byte;
  791. temp: ansistring;
  792. begin
  793. l := high(arr)+1;
  794. if l>=high(res)+1 then
  795. l:=high(res)
  796. else if l<0 then
  797. l:=0;
  798. if zerobased then
  799. begin
  800. index:=IndexWord(arr[0],l,0);
  801. if index<0 then
  802. len:=l
  803. else
  804. len:=index;
  805. end
  806. else
  807. len:=l;
  808. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  809. res:=temp;
  810. end;
  811. {$endif FPC_STRTOSHORTSTRINGPROC}
  812. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING}zerobased: boolean = true): AnsiString; compilerproc;
  813. var
  814. i : SizeInt;
  815. {$ifndef FPC_HAS_CPSTRING}
  816. cp : TSystemCodePage;
  817. {$endif FPC_HAS_CPSTRING}
  818. begin
  819. {$ifndef FPC_HAS_CPSTRING}
  820. cp:=DefaultSystemCodePage;
  821. {$endif FPC_HAS_CPSTRING}
  822. if (zerobased) then
  823. begin
  824. i:=IndexWord(arr,high(arr)+1,0);
  825. if i = -1 then
  826. i := high(arr)+1;
  827. end
  828. else
  829. i := high(arr)+1;
  830. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  831. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  832. end;
  833. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  834. var
  835. i : SizeInt;
  836. begin
  837. if (zerobased) then
  838. begin
  839. i:=IndexWord(arr,high(arr)+1,0);
  840. if i = -1 then
  841. i := high(arr)+1;
  842. end
  843. else
  844. i := high(arr)+1;
  845. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  846. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  847. end;
  848. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  849. var
  850. i : SizeInt;
  851. begin
  852. if (zerobased) then
  853. begin
  854. i:=IndexWord(arr,high(arr)+1,0);
  855. if i = -1 then
  856. i := high(arr)+1;
  857. end
  858. else
  859. i := high(arr)+1;
  860. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  861. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  862. end;
  863. { due to their names, the following procedures should be in wstrings.inc,
  864. however, the compiler generates code using this functions on all platforms }
  865. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  866. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  867. var
  868. l: longint;
  869. index: longint;
  870. len: byte;
  871. temp: ansistring;
  872. begin
  873. l := high(arr)+1;
  874. if l>=256 then
  875. l:=255
  876. else if l<0 then
  877. l:=0;
  878. if zerobased then
  879. begin
  880. index:=IndexWord(arr[0],l,0);
  881. if (index < 0) then
  882. len := l
  883. else
  884. len := index;
  885. end
  886. else
  887. len := l;
  888. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  889. fpc_WideCharArray_To_ShortStr := temp;
  890. end;
  891. {$else FPC_STRTOSHORTSTRINGPROC}
  892. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  893. var
  894. l: longint;
  895. index: ptrint;
  896. len: byte;
  897. temp: ansistring;
  898. begin
  899. l := high(arr)+1;
  900. if l>=high(res)+1 then
  901. l:=high(res)
  902. else if l<0 then
  903. l:=0;
  904. if zerobased then
  905. begin
  906. index:=IndexWord(arr[0],l,0);
  907. if index<0 then
  908. len:=l
  909. else
  910. len:=index;
  911. end
  912. else
  913. len:=l;
  914. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  915. res:=temp;
  916. end;
  917. {$endif FPC_STRTOSHORTSTRINGPROC}
  918. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  919. var
  920. i : SizeInt;
  921. {$ifndef FPC_HAS_CPSTRING}
  922. cp : TSystemCodePage;
  923. {$endif FPC_HAS_CPSTRING}
  924. begin
  925. {$ifndef FPC_HAS_CPSTRING}
  926. cp:=DefaultSystemCodePage;
  927. {$endif FPC_HAS_CPSTRING}
  928. if (zerobased) then
  929. begin
  930. i:=IndexWord(arr,high(arr)+1,0);
  931. if i = -1 then
  932. i := high(arr)+1;
  933. end
  934. else
  935. i := high(arr)+1;
  936. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  937. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  938. end;
  939. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  940. var
  941. i : SizeInt;
  942. begin
  943. if (zerobased) then
  944. begin
  945. i:=IndexWord(arr,high(arr)+1,0);
  946. if i = -1 then
  947. i := high(arr)+1;
  948. end
  949. else
  950. i := high(arr)+1;
  951. SetLength(fpc_WideCharArray_To_WideStr,i);
  952. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  953. end;
  954. {$ifndef FPC_STRTOCHARARRAYPROC}
  955. { inside the compiler, the resulttype is modified to that of the actual }
  956. { chararray we're converting to (JM) }
  957. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  958. var
  959. len: SizeInt;
  960. temp: ansistring;
  961. begin
  962. len := length(src);
  963. { make sure we don't dereference src if it can be nil (JM) }
  964. if len > 0 then
  965. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  966. len := length(temp);
  967. if len > arraysize then
  968. len := arraysize;
  969. {$push}
  970. {$r-}
  971. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  972. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  973. {$pop}
  974. end;
  975. { inside the compiler, the resulttype is modified to that of the actual }
  976. { unicodechararray we're converting to (JM) }
  977. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  978. var
  979. len: SizeInt;
  980. begin
  981. len := length(src);
  982. if len > arraysize then
  983. len := arraysize;
  984. {$push}
  985. {$r-}
  986. { make sure we don't try to access element 1 of the ansistring if it's nil }
  987. if len > 0 then
  988. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  989. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  990. {$pop}
  991. end;
  992. { inside the compiler, the resulttype is modified to that of the actual }
  993. { chararray we're converting to (JM) }
  994. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  995. var
  996. len: SizeInt;
  997. temp: unicodestring;
  998. begin
  999. len := length(src);
  1000. { make sure we don't dereference src if it can be nil (JM) }
  1001. if len > 0 then
  1002. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1003. len := length(temp);
  1004. if len > arraysize then
  1005. len := arraysize;
  1006. {$push}
  1007. {$r-}
  1008. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  1009. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1010. {$pop}
  1011. end;
  1012. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  1013. var
  1014. len: longint;
  1015. temp : unicodestring;
  1016. begin
  1017. len := length(src);
  1018. { make sure we don't access char 1 if length is 0 (JM) }
  1019. if len > 0 then
  1020. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1021. len := length(temp);
  1022. if len > arraysize then
  1023. len := arraysize;
  1024. {$push}
  1025. {$r-}
  1026. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  1027. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1028. {$pop}
  1029. end;
  1030. {$else ndef FPC_STRTOCHARARRAYPROC}
  1031. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  1032. var
  1033. len: SizeInt;
  1034. temp: ansistring;
  1035. begin
  1036. len := length(src);
  1037. { make sure we don't dereference src if it can be nil (JM) }
  1038. if len > 0 then
  1039. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  1040. len := length(temp);
  1041. if len > length(res) then
  1042. len := length(res);
  1043. {$push}
  1044. {$r-}
  1045. move(temp[1],res[0],len);
  1046. fillchar(res[len],length(res)-len,0);
  1047. {$pop}
  1048. end;
  1049. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  1050. var
  1051. len: SizeInt;
  1052. begin
  1053. len := length(src);
  1054. if len > length(res) then
  1055. len := length(res);
  1056. {$push}
  1057. {$r-}
  1058. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1059. if len > 0 then
  1060. move(src[1],res[0],len*SizeOf(UnicodeChar));
  1061. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1062. {$pop}
  1063. end;
  1064. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  1065. var
  1066. len: SizeInt;
  1067. temp: unicodestring;
  1068. begin
  1069. len := length(src);
  1070. { make sure we don't dereference src if it can be nil (JM) }
  1071. if len > 0 then
  1072. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1073. len := length(temp);
  1074. if len > length(res) then
  1075. len := length(res);
  1076. {$push}
  1077. {$r-}
  1078. move(temp[1],res[0],len*sizeof(unicodechar));
  1079. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1080. {$pop}
  1081. end;
  1082. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  1083. var
  1084. len: longint;
  1085. temp : unicodestring;
  1086. begin
  1087. len := length(src);
  1088. { make sure we don't access char 1 if length is 0 (JM) }
  1089. if len > 0 then
  1090. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1091. len := length(temp);
  1092. if len > length(res) then
  1093. len := length(res);
  1094. {$push}
  1095. {$r-}
  1096. move(temp[1],res[0],len*sizeof(unicodechar));
  1097. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1098. {$pop}
  1099. end;
  1100. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  1101. var
  1102. len: SizeInt;
  1103. temp: widestring;
  1104. begin
  1105. len := length(src);
  1106. { make sure we don't dereference src if it can be nil (JM) }
  1107. if len > 0 then
  1108. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  1109. len := length(temp);
  1110. if len > length(res) then
  1111. len := length(res);
  1112. {$push}
  1113. {$r-}
  1114. move(temp[1],res[0],len*sizeof(widechar));
  1115. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1116. {$pop}
  1117. end;
  1118. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  1119. var
  1120. len: longint;
  1121. temp : widestring;
  1122. begin
  1123. len := length(src);
  1124. { make sure we don't access char 1 if length is 0 (JM) }
  1125. if len > 0 then
  1126. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1127. len := length(temp);
  1128. if len > length(res) then
  1129. len := length(res);
  1130. {$push}
  1131. {$r-}
  1132. move(temp[1],res[0],len*sizeof(widechar));
  1133. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1134. {$pop}
  1135. end;
  1136. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  1137. var
  1138. len: SizeInt;
  1139. begin
  1140. len := length(src);
  1141. if len > length(res) then
  1142. len := length(res);
  1143. {$push}
  1144. {$r-}
  1145. { make sure we don't try to access element 1 of the widestring if it's nil }
  1146. if len > 0 then
  1147. move(src[1],res[0],len*SizeOf(WideChar));
  1148. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1149. {$pop}
  1150. end;
  1151. {$endif ndef FPC_STRTOCHARARRAYPROC}
  1152. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  1153. {
  1154. Compares 2 UnicodeStrings;
  1155. The result is
  1156. <0 if S1<S2
  1157. 0 if S1=S2
  1158. >0 if S1>S2
  1159. }
  1160. Var
  1161. MaxI,Temp : SizeInt;
  1162. begin
  1163. if pointer(S1)=pointer(S2) then
  1164. begin
  1165. fpc_UnicodeStr_Compare:=0;
  1166. exit;
  1167. end;
  1168. Maxi:=Length(S1);
  1169. temp:=Length(S2);
  1170. If MaxI>Temp then
  1171. MaxI:=Temp;
  1172. Temp:=CompareWord(S1[1],S2[1],MaxI);
  1173. if temp=0 then
  1174. temp:=Length(S1)-Length(S2);
  1175. fpc_UnicodeStr_Compare:=Temp;
  1176. end;
  1177. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  1178. {
  1179. Compares 2 UnicodeStrings for equality only;
  1180. The result is
  1181. 0 if S1=S2
  1182. <>0 if S1<>S2
  1183. }
  1184. Var
  1185. MaxI : SizeInt;
  1186. begin
  1187. if pointer(S1)=pointer(S2) then
  1188. exit(0);
  1189. Maxi:=Length(S1);
  1190. If MaxI<>Length(S2) then
  1191. exit(-1)
  1192. else
  1193. exit(CompareWord(S1[1],S2[1],MaxI));
  1194. end;
  1195. {$ifdef VER2_4}
  1196. // obsolete but needed for bootstrapping with 2.4
  1197. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1198. begin
  1199. if p=nil then
  1200. HandleErrorFrame(201,get_frame);
  1201. end;
  1202. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1203. begin
  1204. if (index>len) or (Index<1) then
  1205. HandleErrorFrame(201,get_frame);
  1206. end;
  1207. {$else VER2_4}
  1208. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1209. begin
  1210. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  1211. HandleErrorFrame(201,get_frame);
  1212. end;
  1213. {$endif VER2_4}
  1214. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1215. {
  1216. Sets The length of string S to L.
  1217. Makes sure S is unique, and contains enough room.
  1218. }
  1219. Var
  1220. Temp : Pointer;
  1221. movelen: SizeInt;
  1222. lens, lena : SizeUInt;
  1223. begin
  1224. if (l>0) then
  1225. begin
  1226. if Pointer(S)=nil then
  1227. begin
  1228. { Need a complete new string...}
  1229. Pointer(s):=NewUnicodeString(l);
  1230. end
  1231. else
  1232. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1233. begin
  1234. Dec(Pointer(S),UnicodeFirstOff);
  1235. lens:=MemSize(Pointer(s));
  1236. lena:=SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen);
  1237. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  1238. reallocmem(pointer(S), lena);
  1239. Inc(Pointer(S), UnicodeFirstOff);
  1240. end
  1241. else
  1242. begin
  1243. { Reallocation is needed... }
  1244. Temp:=Pointer(NewUnicodeString(L));
  1245. if Length(S)>0 then
  1246. begin
  1247. if l < succ(length(s)) then
  1248. movelen := l
  1249. { also move terminating null }
  1250. else
  1251. movelen := succ(length(s));
  1252. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1253. end;
  1254. fpc_unicodestr_decr_ref(Pointer(S));
  1255. Pointer(S):=Temp;
  1256. end;
  1257. { Force nil termination in case it gets shorter }
  1258. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1259. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  1260. end
  1261. else
  1262. begin
  1263. { Length=0 }
  1264. if Pointer(S)<>nil then
  1265. fpc_unicodestr_decr_ref (Pointer(S));
  1266. Pointer(S):=Nil;
  1267. end;
  1268. end;
  1269. {*****************************************************************************
  1270. Public functions, In interface.
  1271. *****************************************************************************}
  1272. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1273. begin
  1274. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1275. end;
  1276. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1277. var
  1278. temp:unicodestring;
  1279. begin
  1280. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1281. if Length(temp)<DestSize then
  1282. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1283. else
  1284. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1285. Dest[DestSize-1]:=#0;
  1286. result:=Dest;
  1287. end;
  1288. function WideCharToString(S : PWideChar) : UnicodeString;
  1289. begin
  1290. result:=WideCharLenToString(s,Length(WideString(s)));
  1291. end;
  1292. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1293. var
  1294. temp:widestring;
  1295. begin
  1296. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1297. if Length(temp)<DestSize then
  1298. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1299. else
  1300. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1301. Dest[DestSize-1]:=#0;
  1302. result:=Dest;
  1303. end;
  1304. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1305. begin
  1306. SetLength(result,Len);
  1307. Move(S^,Pointer(Result)^,Len*2);
  1308. end;
  1309. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1310. begin
  1311. Dest:=UnicodeCharLenToString(Src,Len);
  1312. end;
  1313. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1314. begin
  1315. Dest:=UnicodeCharLenToString(Src,Len);
  1316. end;
  1317. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1318. begin
  1319. Dest:=UnicodeCharToString(S);
  1320. end;
  1321. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1322. begin
  1323. SetLength(result,Len);
  1324. Move(S^,Pointer(Result)^,Len*2);
  1325. end;
  1326. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1327. begin
  1328. Dest:=WideCharLenToString(Src,Len);
  1329. end;
  1330. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1331. begin
  1332. Dest:=WideCharLenToString(Src,Len);
  1333. end;
  1334. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1335. begin
  1336. Dest:=WideCharToString(S);
  1337. end;
  1338. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1339. begin
  1340. Dest:=WideCharToString(S);
  1341. end;
  1342. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1343. {
  1344. Make sure reference count of S is 1,
  1345. using copy-on-write semantics.
  1346. }
  1347. Var
  1348. SNew : Pointer;
  1349. L : SizeInt;
  1350. begin
  1351. pointer(result) := pointer(s);
  1352. If Pointer(S)=Nil then
  1353. exit;
  1354. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1355. begin
  1356. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1357. SNew:=NewUnicodeString (L);
  1358. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1359. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1360. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1361. pointer(S):=SNew;
  1362. pointer(result):=SNew;
  1363. end;
  1364. end;
  1365. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1366. var
  1367. ResultAddress : Pointer;
  1368. begin
  1369. ResultAddress:=Nil;
  1370. dec(index);
  1371. if Index < 0 then
  1372. Index := 0;
  1373. { Check Size. Accounts for Zero-length S, the double check is needed because
  1374. Size can be maxint and will get <0 when adding index }
  1375. if (Size>Length(S)) or
  1376. (Index+Size>Length(S)) then
  1377. Size:=Length(S)-Index;
  1378. If Size>0 then
  1379. begin
  1380. If Index<0 Then
  1381. Index:=0;
  1382. ResultAddress:=Pointer(NewUnicodeString (Size));
  1383. if ResultAddress<>Nil then
  1384. begin
  1385. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1386. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1387. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1388. end;
  1389. end;
  1390. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1391. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1392. end;
  1393. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1394. var
  1395. i,MaxLen : SizeInt;
  1396. pc : punicodechar;
  1397. begin
  1398. Pos:=0;
  1399. if Length(SubStr)>0 then
  1400. begin
  1401. MaxLen:=Length(source)-Length(SubStr);
  1402. i:=0;
  1403. pc:=@source[1];
  1404. while (i<=MaxLen) do
  1405. begin
  1406. inc(i);
  1407. if (SubStr[1]=pc^) and
  1408. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1409. begin
  1410. Pos:=i;
  1411. exit;
  1412. end;
  1413. inc(pc);
  1414. end;
  1415. end;
  1416. end;
  1417. { Faster version for a unicodechar alone }
  1418. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1419. var
  1420. i: SizeInt;
  1421. pc : punicodechar;
  1422. begin
  1423. pc:=@s[1];
  1424. for i:=1 to length(s) do
  1425. begin
  1426. if pc^=c then
  1427. begin
  1428. pos:=i;
  1429. exit;
  1430. end;
  1431. inc(pc);
  1432. end;
  1433. pos:=0;
  1434. end;
  1435. Function Pos (c : RawByteString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1436. begin
  1437. result:=Pos(UnicodeString(c),s);
  1438. end;
  1439. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1440. begin
  1441. result:=Pos(UnicodeString(c),s);
  1442. end;
  1443. Function Pos (c : UnicodeString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1444. begin
  1445. result:=Pos(c,UnicodeString(s));
  1446. end;
  1447. { Faster version for a char alone. Must be implemented because }
  1448. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1449. { using pos(char,pchar) will always call the shortstring version }
  1450. { (exact match for first argument), also with $h+ (JM) }
  1451. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1452. var
  1453. i: SizeInt;
  1454. wc : unicodechar;
  1455. pc : punicodechar;
  1456. begin
  1457. wc:=c;
  1458. pc:=@s[1];
  1459. for i:=1 to length(s) do
  1460. begin
  1461. if pc^=wc then
  1462. begin
  1463. pos:=i;
  1464. exit;
  1465. end;
  1466. inc(pc);
  1467. end;
  1468. pos:=0;
  1469. end;
  1470. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1471. Var
  1472. LS : SizeInt;
  1473. begin
  1474. LS:=Length(S);
  1475. if (Index>LS) or (Index<=0) or (Size<=0) then
  1476. exit;
  1477. UniqueString (S);
  1478. { (Size+Index) will overflow if Size=MaxInt. }
  1479. if Size>LS-Index then
  1480. Size:=LS-Index+1;
  1481. if Size<=LS-Index then
  1482. begin
  1483. Dec(Index);
  1484. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1485. end;
  1486. Setlength(s,LS-Size);
  1487. end;
  1488. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1489. var
  1490. Temp : UnicodeString;
  1491. LS : SizeInt;
  1492. begin
  1493. If Length(Source)=0 then
  1494. exit;
  1495. if index <= 0 then
  1496. index := 1;
  1497. Ls:=Length(S);
  1498. if index > LS then
  1499. index := LS+1;
  1500. Dec(Index);
  1501. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1502. SetLength(Temp,Length(Source)+LS);
  1503. If Index>0 then
  1504. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1505. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1506. If (LS-Index)>0 then
  1507. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1508. S:=Temp;
  1509. end;
  1510. Function UpCase(c:UnicodeChar):UnicodeChar;
  1511. var
  1512. s : UnicodeString;
  1513. begin
  1514. s:=c;
  1515. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1516. end;
  1517. function UpCase(const s : UnicodeString) : UnicodeString;
  1518. begin
  1519. result:=widestringmanager.UpperUnicodeStringProc(s);
  1520. end;
  1521. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1522. begin
  1523. SetLength(S,Len);
  1524. If (Buf<>Nil) and (Len>0) then
  1525. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1526. end;
  1527. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1528. var
  1529. BufLen: SizeInt;
  1530. begin
  1531. SetLength(S,Len);
  1532. If (Buf<>Nil) and (Len>0) then
  1533. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1534. end;
  1535. {$ifndef FPUNONE}
  1536. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1537. Var
  1538. SS : String;
  1539. begin
  1540. fpc_Val_Real_UnicodeStr := 0;
  1541. if length(S) > 255 then
  1542. code := 256
  1543. else
  1544. begin
  1545. SS := S;
  1546. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1547. end;
  1548. end;
  1549. {$endif}
  1550. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1551. var ss:shortstring;
  1552. begin
  1553. if length(s)>255 then
  1554. code:=256
  1555. else
  1556. begin
  1557. ss:=s;
  1558. val(ss,fpc_val_enum_unicodestr,code);
  1559. end;
  1560. end;
  1561. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1562. Var
  1563. SS : String;
  1564. begin
  1565. if length(S) > 255 then
  1566. begin
  1567. fpc_Val_Currency_UnicodeStr:=0;
  1568. code := 256;
  1569. end
  1570. else
  1571. begin
  1572. SS := S;
  1573. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1574. end;
  1575. end;
  1576. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1577. Var
  1578. SS : ShortString;
  1579. begin
  1580. fpc_Val_UInt_UnicodeStr := 0;
  1581. if length(S) > 255 then
  1582. code := 256
  1583. else
  1584. begin
  1585. SS := S;
  1586. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1587. end;
  1588. end;
  1589. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1590. Var
  1591. SS : ShortString;
  1592. begin
  1593. fpc_Val_SInt_UnicodeStr:=0;
  1594. if length(S)>255 then
  1595. code:=256
  1596. else
  1597. begin
  1598. SS := S;
  1599. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1600. end;
  1601. end;
  1602. {$ifndef CPU64}
  1603. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1604. Var
  1605. SS : ShortString;
  1606. begin
  1607. fpc_Val_qword_UnicodeStr:=0;
  1608. if length(S)>255 then
  1609. code:=256
  1610. else
  1611. begin
  1612. SS := S;
  1613. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1614. end;
  1615. end;
  1616. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1617. Var
  1618. SS : ShortString;
  1619. begin
  1620. fpc_Val_int64_UnicodeStr:=0;
  1621. if length(S)>255 then
  1622. code:=256
  1623. else
  1624. begin
  1625. SS := S;
  1626. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1627. end;
  1628. end;
  1629. {$endif CPU64}
  1630. {$ifndef FPUNONE}
  1631. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1632. var
  1633. ss : shortstring;
  1634. begin
  1635. str_real(len,fr,d,treal_type(rt),ss);
  1636. s:=ss;
  1637. end;
  1638. {$endif}
  1639. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1640. var ss:shortstring;
  1641. begin
  1642. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1643. s:=ss;
  1644. end;
  1645. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1646. var ss:shortstring;
  1647. begin
  1648. fpc_shortstr_bool(b,len,ss);
  1649. s:=ss;
  1650. end;
  1651. {$ifdef FPC_HAS_STR_CURRENCY}
  1652. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1653. var
  1654. ss : shortstring;
  1655. begin
  1656. str(c:len:fr,ss);
  1657. s:=ss;
  1658. end;
  1659. {$endif FPC_HAS_STR_CURRENCY}
  1660. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1661. Var
  1662. SS : ShortString;
  1663. begin
  1664. Str (v:Len,SS);
  1665. S:=SS;
  1666. end;
  1667. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1668. Var
  1669. SS : ShortString;
  1670. begin
  1671. str(v:Len,SS);
  1672. S:=SS;
  1673. end;
  1674. {$ifndef CPU64}
  1675. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1676. Var
  1677. SS : ShortString;
  1678. begin
  1679. Str (v:Len,SS);
  1680. S:=SS;
  1681. end;
  1682. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1683. Var
  1684. SS : ShortString;
  1685. begin
  1686. str(v:Len,SS);
  1687. S:=SS;
  1688. end;
  1689. {$endif CPU64}
  1690. { converts an utf-16 code point or surrogate pair to utf-32 }
  1691. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1692. var
  1693. w: unicodechar;
  1694. begin
  1695. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1696. { are the same in UTF-32 }
  1697. w:=s[index];
  1698. if (w<=#$d7ff) or
  1699. (w>=#$e000) then
  1700. begin
  1701. result:=UCS4Char(w);
  1702. len:=1;
  1703. end
  1704. { valid surrogate pair? }
  1705. else if (w<=#$dbff) and
  1706. { w>=#$d7ff check not needed, checked above }
  1707. (index<length(s)) and
  1708. (s[index+1]>=#$dc00) and
  1709. (s[index+1]<=#$dfff) then
  1710. { convert the surrogate pair to UTF-32 }
  1711. begin
  1712. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1713. len:=2;
  1714. end
  1715. else
  1716. { invalid surrogate -> do nothing }
  1717. begin
  1718. result:=UCS4Char(w);
  1719. len:=1;
  1720. end;
  1721. end;
  1722. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1723. begin
  1724. if assigned(Source) then
  1725. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1726. else
  1727. Result:=0;
  1728. end;
  1729. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1730. var
  1731. i,j : SizeUInt;
  1732. w : word;
  1733. lw : longword;
  1734. len : longint;
  1735. begin
  1736. result:=0;
  1737. if source=nil then
  1738. exit;
  1739. i:=0;
  1740. j:=0;
  1741. if assigned(Dest) then
  1742. begin
  1743. while (i<SourceChars) and (j<MaxDestBytes) do
  1744. begin
  1745. w:=word(Source[i]);
  1746. case w of
  1747. 0..$7f:
  1748. begin
  1749. Dest[j]:=char(w);
  1750. inc(j);
  1751. end;
  1752. $80..$7ff:
  1753. begin
  1754. if j+1>=MaxDestBytes then
  1755. break;
  1756. Dest[j]:=char($c0 or (w shr 6));
  1757. Dest[j+1]:=char($80 or (w and $3f));
  1758. inc(j,2);
  1759. end;
  1760. $800..$d7ff,$e000..$ffff:
  1761. begin
  1762. if j+2>=MaxDestBytes then
  1763. break;
  1764. Dest[j]:=char($e0 or (w shr 12));
  1765. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1766. Dest[j+2]:=char($80 or (w and $3f));
  1767. inc(j,3);
  1768. end;
  1769. $d800..$dbff:
  1770. {High Surrogates}
  1771. begin
  1772. if j+3>=MaxDestBytes then
  1773. break;
  1774. if (i<sourcechars-1) and
  1775. (word(Source[i+1]) >= $dc00) and
  1776. (word(Source[i+1]) <= $dfff) then
  1777. begin
  1778. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1779. Dest[j]:=char($f0 or (lw shr 18));
  1780. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1781. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1782. Dest[j+3]:=char($80 or (lw and $3f));
  1783. inc(j,4);
  1784. inc(i);
  1785. end;
  1786. end;
  1787. end;
  1788. inc(i);
  1789. end;
  1790. if j>SizeUInt(MaxDestBytes-1) then
  1791. j:=MaxDestBytes-1;
  1792. Dest[j]:=#0;
  1793. end
  1794. else
  1795. begin
  1796. while i<SourceChars do
  1797. begin
  1798. case word(Source[i]) of
  1799. $0..$7f:
  1800. inc(j);
  1801. $80..$7ff:
  1802. inc(j,2);
  1803. $800..$d7ff,$e000..$ffff:
  1804. inc(j,3);
  1805. $d800..$dbff:
  1806. begin
  1807. if (i<sourcechars-1) and
  1808. (word(Source[i+1]) >= $dc00) and
  1809. (word(Source[i+1]) <= $dfff) then
  1810. begin
  1811. inc(j,4);
  1812. inc(i);
  1813. end;
  1814. end;
  1815. end;
  1816. inc(i);
  1817. end;
  1818. end;
  1819. result:=j+1;
  1820. end;
  1821. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1822. begin
  1823. if assigned(Source) then
  1824. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1825. else
  1826. Result:=0;
  1827. end;
  1828. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1829. const
  1830. UNICODE_INVALID=63;
  1831. var
  1832. InputUTF8: SizeUInt;
  1833. IBYTE: BYTE;
  1834. OutputUnicode: SizeUInt;
  1835. PRECHAR: SizeUInt;
  1836. TempBYTE: BYTE;
  1837. CharLen: SizeUint;
  1838. LookAhead: SizeUInt;
  1839. UC: SizeUInt;
  1840. begin
  1841. if not assigned(Source) then
  1842. begin
  1843. result:=0;
  1844. exit;
  1845. end;
  1846. result:=SizeUInt(-1);
  1847. InputUTF8:=0;
  1848. OutputUnicode:=0;
  1849. PreChar:=0;
  1850. if Assigned(Dest) Then
  1851. begin
  1852. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1853. begin
  1854. IBYTE:=byte(Source[InputUTF8]);
  1855. if (IBYTE and $80) = 0 then
  1856. begin
  1857. //One character US-ASCII, convert it to unicode
  1858. if IBYTE = 10 then
  1859. begin
  1860. If (PreChar<>13) and FALSE then
  1861. begin
  1862. //Expand to crlf, conform UTF-8.
  1863. //This procedure will break the memory alocation by
  1864. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1865. if OutputUnicode+1<MaxDestChars then
  1866. begin
  1867. Dest[OutputUnicode]:=WideChar(13);
  1868. inc(OutputUnicode);
  1869. Dest[OutputUnicode]:=WideChar(10);
  1870. inc(OutputUnicode);
  1871. PreChar:=10;
  1872. end
  1873. else
  1874. begin
  1875. Dest[OutputUnicode]:=WideChar(13);
  1876. inc(OutputUnicode);
  1877. end;
  1878. end
  1879. else
  1880. begin
  1881. Dest[OutputUnicode]:=WideChar(IBYTE);
  1882. inc(OutputUnicode);
  1883. PreChar:=IBYTE;
  1884. end;
  1885. end
  1886. else
  1887. begin
  1888. Dest[OutputUnicode]:=WideChar(IBYTE);
  1889. inc(OutputUnicode);
  1890. PreChar:=IBYTE;
  1891. end;
  1892. inc(InputUTF8);
  1893. end
  1894. else
  1895. begin
  1896. TempByte:=IBYTE;
  1897. CharLen:=0;
  1898. while (TempBYTE and $80)<>0 do
  1899. begin
  1900. TempBYTE:=(TempBYTE shl 1) and $FE;
  1901. inc(CharLen);
  1902. end;
  1903. //Test for the "CharLen" conforms UTF-8 string
  1904. //This means the 10xxxxxx pattern.
  1905. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1906. begin
  1907. //Insuficient chars in string to decode
  1908. //UTF-8 array. Fallback to single char.
  1909. CharLen:= 1;
  1910. end;
  1911. for LookAhead := 1 to CharLen-1 do
  1912. begin
  1913. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1914. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1915. begin
  1916. //Invalid UTF-8 sequence, fallback.
  1917. CharLen:= LookAhead;
  1918. break;
  1919. end;
  1920. end;
  1921. UC:=$FFFF;
  1922. case CharLen of
  1923. 1: begin
  1924. //Not valid UTF-8 sequence
  1925. UC:=UNICODE_INVALID;
  1926. end;
  1927. 2: begin
  1928. //Two bytes UTF, convert it
  1929. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1930. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1931. if UC <= $7F then
  1932. begin
  1933. //Invalid UTF sequence.
  1934. UC:=UNICODE_INVALID;
  1935. end;
  1936. end;
  1937. 3: begin
  1938. //Three bytes, convert it to unicode
  1939. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1940. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1941. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1942. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1943. begin
  1944. //Invalid UTF-8 sequence
  1945. UC:= UNICODE_INVALID;
  1946. End;
  1947. end;
  1948. 4: begin
  1949. //Four bytes, convert it to two unicode characters
  1950. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1951. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1952. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1953. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1954. if (UC < $10000) or (UC > $10FFFF) then
  1955. begin
  1956. UC:= UNICODE_INVALID;
  1957. end
  1958. else
  1959. begin
  1960. { only store pair if room }
  1961. dec(UC,$10000);
  1962. if (OutputUnicode<MaxDestChars-1) then
  1963. begin
  1964. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1965. inc(OutputUnicode);
  1966. UC:=(UC and $3ff) + $DC00;
  1967. end
  1968. else
  1969. begin
  1970. InputUTF8:= InputUTF8 + CharLen;
  1971. { don't store anything }
  1972. CharLen:=0;
  1973. end;
  1974. end;
  1975. end;
  1976. 5,6,7: begin
  1977. //Invalid UTF8 to unicode conversion,
  1978. //mask it as invalid UNICODE too.
  1979. UC:=UNICODE_INVALID;
  1980. end;
  1981. end;
  1982. if CharLen > 0 then
  1983. begin
  1984. PreChar:=UC;
  1985. Dest[OutputUnicode]:=WideChar(UC);
  1986. inc(OutputUnicode);
  1987. end;
  1988. InputUTF8:= InputUTF8 + CharLen;
  1989. end;
  1990. end;
  1991. Result:=OutputUnicode+1;
  1992. end
  1993. else
  1994. begin
  1995. while (InputUTF8<SourceBytes) do
  1996. begin
  1997. IBYTE:=byte(Source[InputUTF8]);
  1998. if (IBYTE and $80) = 0 then
  1999. begin
  2000. //One character US-ASCII, convert it to unicode
  2001. if IBYTE = 10 then
  2002. begin
  2003. if (PreChar<>13) and FALSE then
  2004. begin
  2005. //Expand to crlf, conform UTF-8.
  2006. //This procedure will break the memory alocation by
  2007. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  2008. inc(OutputUnicode,2);
  2009. PreChar:=10;
  2010. end
  2011. else
  2012. begin
  2013. inc(OutputUnicode);
  2014. PreChar:=IBYTE;
  2015. end;
  2016. end
  2017. else
  2018. begin
  2019. inc(OutputUnicode);
  2020. PreChar:=IBYTE;
  2021. end;
  2022. inc(InputUTF8);
  2023. end
  2024. else
  2025. begin
  2026. TempByte:=IBYTE;
  2027. CharLen:=0;
  2028. while (TempBYTE and $80)<>0 do
  2029. begin
  2030. TempBYTE:=(TempBYTE shl 1) and $FE;
  2031. inc(CharLen);
  2032. end;
  2033. //Test for the "CharLen" conforms UTF-8 string
  2034. //This means the 10xxxxxx pattern.
  2035. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  2036. begin
  2037. //Insuficient chars in string to decode
  2038. //UTF-8 array. Fallback to single char.
  2039. CharLen:= 1;
  2040. end;
  2041. for LookAhead := 1 to CharLen-1 do
  2042. begin
  2043. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  2044. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  2045. begin
  2046. //Invalid UTF-8 sequence, fallback.
  2047. CharLen:= LookAhead;
  2048. break;
  2049. end;
  2050. end;
  2051. UC:=$FFFF;
  2052. case CharLen of
  2053. 1: begin
  2054. //Not valid UTF-8 sequence
  2055. UC:=UNICODE_INVALID;
  2056. end;
  2057. 2: begin
  2058. //Two bytes UTF, convert it
  2059. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  2060. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  2061. if UC <= $7F then
  2062. begin
  2063. //Invalid UTF sequence.
  2064. UC:=UNICODE_INVALID;
  2065. end;
  2066. end;
  2067. 3: begin
  2068. //Three bytes, convert it to unicode
  2069. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  2070. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  2071. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  2072. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  2073. begin
  2074. //Invalid UTF-8 sequence
  2075. UC:= UNICODE_INVALID;
  2076. end;
  2077. end;
  2078. 4: begin
  2079. //Four bytes, convert it to two unicode characters
  2080. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  2081. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  2082. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  2083. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  2084. if (UC < $10000) or (UC > $10FFFF) then
  2085. UC:= UNICODE_INVALID
  2086. else
  2087. { extra character character }
  2088. inc(OutputUnicode);
  2089. end;
  2090. 5,6,7: begin
  2091. //Invalid UTF8 to unicode conversion,
  2092. //mask it as invalid UNICODE too.
  2093. UC:=UNICODE_INVALID;
  2094. end;
  2095. end;
  2096. if CharLen > 0 then
  2097. begin
  2098. PreChar:=UC;
  2099. inc(OutputUnicode);
  2100. end;
  2101. InputUTF8:= InputUTF8 + CharLen;
  2102. end;
  2103. end;
  2104. Result:=OutputUnicode+1;
  2105. end;
  2106. end;
  2107. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  2108. begin
  2109. Result:=UTF8Encode(UnicodeString(s));
  2110. end;
  2111. function UTF8Encode(const s : UnicodeString) : RawByteString;
  2112. var
  2113. i : SizeInt;
  2114. hs : UTF8String;
  2115. begin
  2116. result:='';
  2117. if s='' then
  2118. exit;
  2119. SetLength(hs,length(s)*3);
  2120. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  2121. if i>0 then
  2122. begin
  2123. SetLength(hs,i-1);
  2124. result:=hs;
  2125. end;
  2126. end;
  2127. function UTF8Decode(const s : RawByteString): UnicodeString;
  2128. var
  2129. i : SizeInt;
  2130. hs : UnicodeString;
  2131. begin
  2132. result:='';
  2133. if s='' then
  2134. exit;
  2135. SetLength(hs,length(s));
  2136. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  2137. if i>0 then
  2138. begin
  2139. SetLength(hs,i-1);
  2140. result:=hs;
  2141. end;
  2142. end;
  2143. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2144. begin
  2145. Result:=Utf8Encode(s);
  2146. end;
  2147. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2148. begin
  2149. Result:=Utf8Decode(s);
  2150. end;
  2151. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  2152. var
  2153. i, slen,
  2154. destindex : SizeInt;
  2155. len : longint;
  2156. begin
  2157. slen:=length(s);
  2158. setlength(result,slen+1);
  2159. i:=1;
  2160. destindex:=0;
  2161. while (i<=slen) do
  2162. begin
  2163. result[destindex]:=utf16toutf32(s,i,len);
  2164. inc(destindex);
  2165. inc(i,len);
  2166. end;
  2167. { destindex <= slen (surrogate pairs may have been merged) }
  2168. { destindex+1 for terminating #0 (dynamic arrays are }
  2169. { implicitely filled with zero) }
  2170. setlength(result,destindex+1);
  2171. end;
  2172. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  2173. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  2174. var
  2175. p : PUnicodeChar;
  2176. begin
  2177. { if nc > $ffff, we need two places }
  2178. if (index+ord(nc > $ffff)>length(s)) then
  2179. if (length(s) < 10*256) then
  2180. setlength(s,length(s)+10)
  2181. else
  2182. setlength(s,length(s)+length(s) shr 8);
  2183. { we know that s is unique -> avoid uniquestring calls}
  2184. p:=@s[index];
  2185. if (nc<$ffff) then
  2186. begin
  2187. p^:=unicodechar(nc);
  2188. inc(index);
  2189. end
  2190. else if (dword(nc)<=$10ffff) then
  2191. begin
  2192. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  2193. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  2194. inc(index,2);
  2195. end
  2196. else
  2197. { invalid code point }
  2198. begin
  2199. p^:='?';
  2200. inc(index);
  2201. end;
  2202. end;
  2203. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  2204. var
  2205. i : SizeInt;
  2206. resindex : SizeInt;
  2207. begin
  2208. { skip terminating #0 }
  2209. SetLength(result,length(s)-1);
  2210. resindex:=1;
  2211. for i:=0 to high(s)-1 do
  2212. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  2213. { adjust result length (may be too big due to growing }
  2214. { for surrogate pairs) }
  2215. setlength(result,resindex-1);
  2216. end;
  2217. function WideStringToUCS4String(const s : WideString) : UCS4String;
  2218. var
  2219. i, slen,
  2220. destindex : SizeInt;
  2221. len : longint;
  2222. begin
  2223. slen:=length(s);
  2224. setlength(result,slen+1);
  2225. i:=1;
  2226. destindex:=0;
  2227. while (i<=slen) do
  2228. begin
  2229. result[destindex]:=utf16toutf32(s,i,len);
  2230. inc(destindex);
  2231. inc(i,len);
  2232. end;
  2233. { destindex <= slen (surrogate pairs may have been merged) }
  2234. { destindex+1 for terminating #0 (dynamic arrays are }
  2235. { implicitely filled with zero) }
  2236. setlength(result,destindex+1);
  2237. end;
  2238. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  2239. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  2240. var
  2241. p : PWideChar;
  2242. begin
  2243. { if nc > $ffff, we need two places }
  2244. if (index+ord(nc > $ffff)>length(s)) then
  2245. if (length(s) < 10*256) then
  2246. setlength(s,length(s)+10)
  2247. else
  2248. setlength(s,length(s)+length(s) shr 8);
  2249. { we know that s is unique -> avoid uniquestring calls}
  2250. p:=@s[index];
  2251. if (nc<$ffff) then
  2252. begin
  2253. p^:=widechar(nc);
  2254. inc(index);
  2255. end
  2256. else if (dword(nc)<=$10ffff) then
  2257. begin
  2258. p^:=widechar((nc - $10000) shr 10 + $d800);
  2259. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  2260. inc(index,2);
  2261. end
  2262. else
  2263. { invalid code point }
  2264. begin
  2265. p^:='?';
  2266. inc(index);
  2267. end;
  2268. end;
  2269. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2270. var
  2271. i : SizeInt;
  2272. resindex : SizeInt;
  2273. begin
  2274. { skip terminating #0 }
  2275. SetLength(result,length(s)-1);
  2276. resindex:=1;
  2277. for i:=0 to high(s)-1 do
  2278. ConcatUTF32ToWideStr(s[i],result,resindex);
  2279. { adjust result length (may be too big due to growing }
  2280. { for surrogate pairs) }
  2281. setlength(result,resindex-1);
  2282. end;
  2283. const
  2284. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2285. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2286. procedure unimplementedunicodestring;
  2287. begin
  2288. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2289. If IsConsole then
  2290. begin
  2291. Writeln(StdErr,SNoUnicodestrings);
  2292. Writeln(StdErr,SRecompileWithUnicodestrings);
  2293. end;
  2294. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2295. HandleErrorFrame(233,get_frame);
  2296. end;
  2297. function StringElementSize(const S: UnicodeString): Word; overload;
  2298. begin
  2299. if assigned(Pointer(S)) then
  2300. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2301. else
  2302. Result:=SizeOf(UnicodeChar);
  2303. end;
  2304. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2305. begin
  2306. if assigned(Pointer(S)) then
  2307. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2308. else
  2309. Result:=0;
  2310. end;
  2311. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2312. begin
  2313. {$ifdef FPC_HAS_CPSTRING}
  2314. if assigned(Pointer(S)) then
  2315. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2316. else
  2317. {$endif FPC_HAS_CPSTRING}
  2318. Result:=DefaultUnicodeCodePage;
  2319. end;
  2320. {$warnings off}
  2321. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2322. begin
  2323. unimplementedunicodestring;
  2324. end;
  2325. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2326. begin
  2327. unimplementedunicodestring;
  2328. end;
  2329. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2330. begin
  2331. unimplementedunicodestring;
  2332. end;
  2333. {$warnings on}
  2334. procedure initunicodestringmanager;
  2335. begin
  2336. {$ifndef HAS_WIDESTRINGMANAGER}
  2337. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2338. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2339. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2340. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2341. {$endif HAS_WIDESTRINGMANAGER}
  2342. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2343. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2344. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2345. {$ifndef HAS_WIDESTRINGMANAGER}
  2346. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2347. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2348. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2349. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2350. {$endif HAS_WIDESTRINGMANAGER}
  2351. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2352. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2353. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2354. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2355. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2356. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2357. end;