2
0

ustrings.inc 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S= SizeOf(SizeInt), R= (if CPU64 then SizeOf(Longint) else SizeOf(SizeInt))):
  19. @-S-R : Reference count (R bytes)
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$if not defined(VER3_0) and not defined(VER3_2)}
  34. {$ifdef CPU64}
  35. Ref : Longint;
  36. {$else}
  37. Ref : SizeInt;
  38. {$endif}
  39. {$else}
  40. {$ifdef CPU64}
  41. { align fields }
  42. Dummy : DWord;
  43. {$endif CPU64}
  44. Ref : SizeInt;
  45. {$endif}
  46. Len : SizeInt;
  47. end;
  48. Const
  49. UnicodeFirstOff = SizeOf(TUnicodeRec);
  50. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  51. {
  52. Default UnicodeChar <-> Char conversion is to only convert the
  53. lower 127 chars, all others are translated to '?'.
  54. These routines can be overridden for the Current Locale
  55. }
  56. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  57. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  58. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. p : PAnsiChar;
  62. begin
  63. setlength(dest,len);
  64. if not assigned(pointer(dest)) then
  65. exit;
  66. SetCodePage(dest,cp,false);
  67. p:=pointer(dest); {SetLength guarantees that dest is unique}
  68. for i:=1 to len do
  69. begin
  70. if word(source^)<256 then
  71. p^:=char(word(source^))
  72. else
  73. p^:='?';
  74. inc(source);
  75. inc(p);
  76. end;
  77. end;
  78. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  79. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  80. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  81. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  82. var
  83. i : SizeInt;
  84. p : PUnicodeChar;
  85. begin
  86. setlength(dest,len);
  87. p:=pointer(dest); {SetLength guarantees that dest is unique}
  88. for i:=1 to len do
  89. begin
  90. p^:=unicodechar(byte(source^));
  91. inc(source);
  92. inc(p);
  93. end;
  94. end;
  95. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  96. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  97. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  98. begin
  99. DefaultCharLengthPChar:=length(Str);
  100. end;
  101. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  102. begin
  103. if str[0]<>#0 then
  104. DefaultCodePointLength:=1
  105. else
  106. DefaultCodePointLength:=0;
  107. end;
  108. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  109. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  110. begin
  111. { don't raise an exception here. We need this for text file handling }
  112. if stdcp<>scpFileSystemSingleByte then
  113. Result:=DefaultSystemCodePage
  114. else
  115. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  116. without a fully functional widestring manager that will probably cause
  117. more problems that it solves }
  118. Result:=DefaultFileSystemCodePage
  119. end;
  120. Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
  121. begin
  122. manager:=widestringmanager;
  123. end;
  124. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
  125. begin
  126. Old:=widestringmanager;
  127. widestringmanager:=New;
  128. end;
  129. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  130. begin
  131. widestringmanager:=New;
  132. end;
  133. Procedure GetWideStringManager (out Manager : TUnicodeStringManager);
  134. begin
  135. manager:=widestringmanager;
  136. end;
  137. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out old: TUnicodeStringManager);
  138. begin
  139. Old:=widestringmanager;
  140. widestringmanager:=New;
  141. end;
  142. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  143. begin
  144. widestringmanager:=New;
  145. end;
  146. {****************************************************************************
  147. Internal functions, not in interface.
  148. ****************************************************************************}
  149. procedure UnicodeStringError;
  150. begin
  151. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  152. end;
  153. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  154. {$define FPC_HAS_NEW_UNICODESTRING}
  155. Function NewUnicodeString(Len : SizeInt) : Pointer;
  156. {
  157. Allocate a new UnicodeString on the heap.
  158. initialize it to zero length and reference count 1.
  159. }
  160. Var
  161. P : Pointer;
  162. begin
  163. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  164. If P<>Nil then
  165. begin
  166. PUnicodeRec(P)^.Len:=Len; { Initial length }
  167. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  168. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  169. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  170. inc(p,UnicodeFirstOff); { Points to string now }
  171. PUnicodeChar(P)^:=#0; { Terminating #0 }
  172. end
  173. else
  174. UnicodeStringError;
  175. NewUnicodeString:=P;
  176. end;
  177. {$endif FPC_HAS_NEW_UNICODESTRING}
  178. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  179. {$define FPC_HAS_UNICODESTR_DECR_REF}
  180. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  181. {
  182. Decreases the ReferenceCount of a non constant unicodestring;
  183. If the reference count is zero, deallocate the string;
  184. }
  185. Var
  186. p: PUnicodeRec;
  187. Begin
  188. { Zero string }
  189. if S=Nil then
  190. exit;
  191. { check for constant strings ...}
  192. p:=PUnicodeRec(S-UnicodeFirstOff);
  193. S:=nil;
  194. if p^.Ref<0 then
  195. exit;
  196. { declocked does a MT safe dec and returns true, if the counter is 0 }
  197. if declocked(p^.Ref) then
  198. FreeMem(p);
  199. end;
  200. { alias for internal use }
  201. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  202. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  203. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  204. {$define FPC_HAS_UNICODESTR_INCR_REF}
  205. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  206. Begin
  207. If S=Nil then
  208. exit;
  209. { constant string ? }
  210. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  211. exit;
  212. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  213. end;
  214. { alias for internal use }
  215. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  216. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  217. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  218. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  219. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  220. {
  221. Converts a UnicodeString to a ShortString;
  222. }
  223. Var
  224. Size : SizeInt;
  225. temp : ansistring;
  226. begin
  227. res:='';
  228. Size:=Length(S2);
  229. if Size>0 then
  230. begin
  231. If Size>high(res) then
  232. Size:=high(res);
  233. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  234. res:=temp;
  235. end;
  236. end;
  237. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  238. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  239. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  240. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  241. {
  242. Converts a ShortString to a UnicodeString;
  243. }
  244. Var
  245. Size : SizeInt;
  246. begin
  247. result:='';
  248. Size:=Length(S2);
  249. if Size>0 then
  250. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  251. end;
  252. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  253. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  254. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  255. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  256. {
  257. Converts a UnicodeString to an AnsiString
  258. }
  259. Var
  260. Size : SizeInt;
  261. {$ifndef FPC_HAS_CPSTRING}
  262. cp : TSystemCodePage;
  263. {$endif FPC_HAS_CPSTRING}
  264. begin
  265. {$ifndef FPC_HAS_CPSTRING}
  266. cp:=DefaultSystemCodePage;
  267. {$endif FPC_HAS_CPSTRING}
  268. result:='';
  269. Size:=Length(S2);
  270. if Size>0 then
  271. begin
  272. cp:=TranslatePlaceholderCP(cp);
  273. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  274. end;
  275. end;
  276. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  277. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  278. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  279. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  280. {
  281. Converts an AnsiString to a UnicodeString;
  282. }
  283. Var
  284. Size : SizeInt;
  285. cp: TSystemCodePage;
  286. begin
  287. result:='';
  288. Size:=Length(S2);
  289. if Size>0 then
  290. begin
  291. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  292. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  293. end;
  294. end;
  295. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  296. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  297. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  298. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  304. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  305. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  306. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  307. begin
  308. SetLength(Result,Length(S2));
  309. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  310. end;
  311. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  312. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  313. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  314. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  315. var
  316. Size : SizeInt;
  317. begin
  318. result:='';
  319. if p=nil then
  320. exit;
  321. Size := IndexWord(p^, -1, 0);
  322. Setlength(result,Size);
  323. if Size>0 then
  324. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  325. end;
  326. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  327. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  328. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  329. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  330. var
  331. Size : SizeInt;
  332. {$ifndef FPC_HAS_CPSTRING}
  333. cp : TSystemCodePage;
  334. {$endif FPC_HAS_CPSTRING}
  335. begin
  336. {$ifndef FPC_HAS_CPSTRING}
  337. cp:=DefaultSystemCodePage;
  338. {$endif FPC_HAS_CPSTRING}
  339. result:='';
  340. if p=nil then
  341. exit;
  342. Size := IndexWord(p^, -1, 0);
  343. if Size>0 then
  344. begin
  345. cp:=TranslatePlaceholderCP(cp);
  346. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  347. end;
  348. end;
  349. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  350. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  351. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  352. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  353. var
  354. Size : SizeInt;
  355. temp: ansistring;
  356. begin
  357. res:='';
  358. if p=nil then
  359. exit;
  360. Size:=IndexWord(p^, high(PtrInt), 0);
  361. if Size>0 then
  362. begin
  363. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  364. res:=temp;
  365. end;
  366. end;
  367. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  368. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  369. {$define FPC_UNICODESTR_ASSIGN}
  370. { checked against the ansistring routine, 2001-05-27 (FK) }
  371. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  372. {
  373. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  374. }
  375. begin
  376. If S2<>nil then
  377. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  378. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  379. { Decrease the reference count on the old S1 }
  380. fpc_unicodestr_decr_ref (S1);
  381. s1:=s2;
  382. end;
  383. { alias for internal use }
  384. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  385. {$endif FPC_UNICODESTR_ASSIGN}
  386. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  387. {$define FPC_HAS_UNICODESTR_CONCAT}
  388. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  389. Var
  390. Size,Location : SizeInt;
  391. same : boolean;
  392. begin
  393. { only assign if s1 or s2 is empty }
  394. if (S1='') then
  395. begin
  396. DestS:=s2;
  397. exit;
  398. end;
  399. if (S2='') then
  400. begin
  401. DestS:=s1;
  402. exit;
  403. end;
  404. Location:=Length(S1);
  405. Size:=length(S2);
  406. { Use Pointer() typecasts to prevent extra conversion code }
  407. if Pointer(DestS)=Pointer(S1) then
  408. begin
  409. same:=Pointer(S1)=Pointer(S2);
  410. SetLength(DestS,Size+Location);
  411. if same then
  412. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  413. else
  414. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  415. end
  416. else if Pointer(DestS)=Pointer(S2) then
  417. begin
  418. SetLength(DestS,Size+Location);
  419. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  420. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  421. end
  422. else
  423. begin
  424. DestS:='';
  425. SetLength(DestS,Size+Location);
  426. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  427. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  428. end;
  429. end;
  430. {$endif FPC_HAS_UNICODESTR_CONCAT}
  431. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  432. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  433. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  434. Var
  435. i : Longint;
  436. p,pc : pointer;
  437. Size,NewLen : SizeInt;
  438. lowstart : longint;
  439. destcopy : pointer;
  440. OldDestLen : SizeInt;
  441. begin
  442. if high(sarr)=0 then
  443. begin
  444. DestS:='';
  445. exit;
  446. end;
  447. destcopy:=nil;
  448. lowstart:=low(sarr);
  449. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  450. inc(lowstart);
  451. { Check for another reuse, then we can't use
  452. the append optimization }
  453. for i:=lowstart to high(sarr) do
  454. begin
  455. if Pointer(DestS)=Pointer(sarr[i]) then
  456. begin
  457. { if DestS is used somewhere in the middle of the expression,
  458. we need to make sure the original string still exists after
  459. we empty/modify DestS.
  460. This trick only works with reference counted strings. Therefor
  461. this optimization is disabled for WINLIKEUNICODESTRING }
  462. destcopy:=pointer(dests);
  463. fpc_UnicodeStr_Incr_Ref(destcopy);
  464. lowstart:=low(sarr);
  465. break;
  466. end;
  467. end;
  468. { Start with empty DestS if we start with concatting
  469. the first array element }
  470. if lowstart=low(sarr) then
  471. DestS:='';
  472. OldDestLen:=length(DestS);
  473. { Calculate size of the result so we can do
  474. a single call to SetLength() }
  475. NewLen:=0;
  476. for i:=low(sarr) to high(sarr) do
  477. inc(NewLen,length(sarr[i]));
  478. SetLength(DestS,NewLen);
  479. { Concat all strings, except the string we already
  480. copied in DestS }
  481. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  482. for i:=lowstart to high(sarr) do
  483. begin
  484. p:=pointer(sarr[i]);
  485. if assigned(p) then
  486. begin
  487. Size:=length(unicodestring(p));
  488. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  489. inc(pc,size*sizeof(UnicodeChar));
  490. end;
  491. end;
  492. fpc_UnicodeStr_Decr_Ref(destcopy);
  493. end;
  494. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  495. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  496. {$define FPC_HAS_CHAR_TO_UCHAR}
  497. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  498. var
  499. w: unicodestring;
  500. begin
  501. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  502. fpc_Char_To_UChar:=w[1];
  503. end;
  504. {$endif FPC_HAS_CHAR_TO_UCHAR}
  505. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  506. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  507. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  508. {
  509. Converts a Char to a UnicodeString;
  510. }
  511. begin
  512. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  513. end;
  514. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  515. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  516. {$define FPC_HAS_UCHAR_TO_CHAR}
  517. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  518. {
  519. Converts a UnicodeChar to a Char;
  520. }
  521. var
  522. s: ansistring;
  523. begin
  524. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  525. if length(s)=1 then
  526. fpc_UChar_To_Char:= s[1]
  527. else
  528. fpc_UChar_To_Char:='?';
  529. end;
  530. {$endif FPC_HAS_UCHAR_TO_CHAR}
  531. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  532. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  533. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  534. {
  535. Converts a WideChar to a ShortString;
  536. }
  537. var
  538. s: ansistring;
  539. begin
  540. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  541. result:=s;
  542. end;
  543. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  544. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  545. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  546. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  547. {
  548. Converts a UnicodeChar to a UnicodeString;
  549. }
  550. begin
  551. Setlength (fpc_UChar_To_UnicodeStr,1);
  552. fpc_UChar_To_UnicodeStr[1]:= c;
  553. end;
  554. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  555. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  556. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  557. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  558. {
  559. Converts a UnicodeChar to a AnsiString;
  560. }
  561. {$ifndef FPC_HAS_CPSTRING}
  562. var
  563. cp : TSystemCodePage;
  564. {$endif FPC_HAS_CPSTRING}
  565. begin
  566. {$ifndef FPC_HAS_CPSTRING}
  567. cp:=DefaultSystemCodePage;
  568. {$endif FPC_HAS_CPSTRING}
  569. cp:=TranslatePlaceholderCP(cp);
  570. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  571. end;
  572. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  573. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  574. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  575. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  576. Var
  577. L : SizeInt;
  578. begin
  579. if (not assigned(p)) or (p[0]=#0) Then
  580. begin
  581. fpc_pchar_to_unicodestr := '';
  582. exit;
  583. end;
  584. l:=IndexChar(p^,-1,#0);
  585. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  586. end;
  587. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  588. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  589. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  590. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  591. var
  592. i : SizeInt;
  593. begin
  594. if zerobased then
  595. begin
  596. if arr[0]=#0 Then
  597. begin
  598. fpc_chararray_to_unicodestr:='';
  599. exit;
  600. end;
  601. i:=IndexChar(arr,high(arr)+1,#0);
  602. if i=-1 then
  603. i:=high(arr)+1;
  604. end
  605. else
  606. i:=high(arr)+1;
  607. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  608. end;
  609. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  610. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  611. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  612. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  613. var
  614. i : SizeInt;
  615. begin
  616. if (zerobased) then
  617. begin
  618. i:=IndexWord(arr,high(arr)+1,0);
  619. if i = -1 then
  620. i := high(arr)+1;
  621. end
  622. else
  623. i := high(arr)+1;
  624. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  625. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  626. end;
  627. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  628. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  629. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  630. { due to their names, the following procedures should be in wstrings.inc,
  631. however, the compiler generates code using this functions on all platforms }
  632. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  633. var
  634. l: longint;
  635. index: ptrint;
  636. len: byte;
  637. temp: ansistring;
  638. begin
  639. l := high(arr)+1;
  640. if l>=high(res)+1 then
  641. l:=high(res)
  642. else if l<0 then
  643. l:=0;
  644. if zerobased then
  645. begin
  646. index:=IndexWord(arr[0],l,0);
  647. if index<0 then
  648. len:=l
  649. else
  650. len:=index;
  651. end
  652. else
  653. len:=l;
  654. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  655. res:=temp;
  656. end;
  657. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  658. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  659. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  660. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  661. var
  662. i : SizeInt;
  663. {$ifndef FPC_HAS_CPSTRING}
  664. cp : TSystemCodePage;
  665. {$endif FPC_HAS_CPSTRING}
  666. begin
  667. {$ifndef FPC_HAS_CPSTRING}
  668. cp:=DefaultSystemCodePage;
  669. {$endif FPC_HAS_CPSTRING}
  670. if (zerobased) then
  671. begin
  672. i:=IndexWord(arr,high(arr)+1,0);
  673. if i = -1 then
  674. i := high(arr)+1;
  675. end
  676. else
  677. i := high(arr)+1;
  678. if i > 0 then
  679. begin
  680. cp:=TranslatePlaceholderCP(cp);
  681. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  682. end
  683. else
  684. fpc_WideCharArray_To_AnsiStr:='';
  685. end;
  686. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  687. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  688. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  689. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  690. var
  691. i : SizeInt;
  692. begin
  693. if (zerobased) then
  694. begin
  695. i:=IndexWord(arr,high(arr)+1,0);
  696. if i = -1 then
  697. i := high(arr)+1;
  698. end
  699. else
  700. i := high(arr)+1;
  701. SetLength(fpc_WideCharArray_To_WideStr,i);
  702. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  703. end;
  704. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  705. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  706. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  707. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  708. var
  709. len: SizeInt;
  710. temp: ansistring;
  711. begin
  712. len := length(src);
  713. { make sure we don't dereference src if it can be nil (JM) }
  714. if len > 0 then
  715. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  716. len := length(temp);
  717. if len > length(res) then
  718. len := length(res);
  719. {$push}
  720. {$r-}
  721. move(temp[1],res[0],len);
  722. fillchar(res[len],length(res)-len,0);
  723. {$pop}
  724. end;
  725. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  726. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  727. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  728. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  729. var
  730. len: SizeInt;
  731. temp: widestring;
  732. begin
  733. len := length(src);
  734. { make sure we don't dereference src if it can be nil (JM) }
  735. if len > 0 then
  736. widestringmanager.ansi2widemoveproc(pchar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  737. len := length(temp);
  738. if len > length(res) then
  739. len := length(res);
  740. {$push}
  741. {$r-}
  742. move(temp[1],res[0],len*sizeof(widechar));
  743. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  744. {$pop}
  745. end;
  746. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  747. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  748. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  749. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  750. var
  751. len: longint;
  752. temp : widestring;
  753. begin
  754. len := length(src);
  755. { make sure we don't access char 1 if length is 0 (JM) }
  756. if len > 0 then
  757. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  758. len := length(temp);
  759. if len > length(res) then
  760. len := length(res);
  761. {$push}
  762. {$r-}
  763. move(temp[1],res[0],len*sizeof(widechar));
  764. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  765. {$pop}
  766. end;
  767. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  768. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  769. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  770. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  771. var
  772. len: SizeInt;
  773. begin
  774. len := length(src);
  775. if len > length(res) then
  776. len := length(res);
  777. {$push}
  778. {$r-}
  779. { make sure we don't try to access element 1 of the widestring if it's nil }
  780. if len > 0 then
  781. move(src[1],res[0],len*SizeOf(WideChar));
  782. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  783. {$pop}
  784. end;
  785. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  786. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  787. {$define FPC_HAS_UNICODESTR_COMPARE}
  788. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  789. {
  790. Compares 2 UnicodeStrings;
  791. The result is
  792. <0 if S1<S2
  793. 0 if S1=S2
  794. >0 if S1>S2
  795. }
  796. Var
  797. MaxI,Temp : SizeInt;
  798. begin
  799. if pointer(S1)=pointer(S2) then
  800. begin
  801. fpc_UnicodeStr_Compare:=0;
  802. exit;
  803. end;
  804. Maxi:=Length(S1);
  805. temp:=Length(S2);
  806. If MaxI>Temp then
  807. MaxI:=Temp;
  808. Temp:=CompareWord(S1[1],S2[1],MaxI);
  809. if temp=0 then
  810. temp:=Length(S1)-Length(S2);
  811. fpc_UnicodeStr_Compare:=Temp;
  812. end;
  813. {$endif FPC_HAS_UNICODESTR_COMPARE}
  814. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  815. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  816. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  817. {
  818. Compares 2 UnicodeStrings for equality only;
  819. The result is
  820. 0 if S1=S2
  821. <>0 if S1<>S2
  822. }
  823. Var
  824. MaxI : SizeInt;
  825. begin
  826. if pointer(S1)=pointer(S2) then
  827. exit(0);
  828. Maxi:=Length(S1);
  829. If MaxI<>Length(S2) then
  830. exit(-1)
  831. else
  832. exit(CompareWord(S1[1],S2[1],MaxI));
  833. end;
  834. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  835. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  836. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  837. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  838. begin
  839. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  840. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  841. end;
  842. Procedure fpc_UnicodeStr_ZeroBased_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_ZEROBASED_RANGECHECK']; compilerproc;
  843. begin
  844. if (p=nil) or (index>=PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<0) then
  845. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  846. end;
  847. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  848. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  849. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  850. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  851. {
  852. Sets The length of string S to L.
  853. Makes sure S is unique, and contains enough room.
  854. }
  855. Var
  856. Temp : Pointer;
  857. movelen: SizeInt;
  858. nl,lens, lena : SizeUInt;
  859. begin
  860. nl:=l;
  861. if (l>0) then
  862. begin
  863. if Pointer(S)=nil then
  864. begin
  865. { Need a complete new string...}
  866. Pointer(s):=NewUnicodeString(nl);
  867. end
  868. else
  869. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  870. begin
  871. Temp:=Pointer(s)-UnicodeFirstOff;
  872. lens:=MemSize(Temp);
  873. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  874. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  875. begin
  876. reallocmem(Temp, lena);
  877. Pointer(S):=Temp+UnicodeFirstOff;
  878. end;
  879. end
  880. else
  881. begin
  882. { Reallocation is needed... }
  883. Temp:=NewUnicodeString(nL);
  884. if Length(S)>0 then
  885. begin
  886. if l < succ(length(s)) then
  887. movelen := l
  888. { also move terminating null }
  889. else
  890. movelen := succ(length(s));
  891. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  892. end;
  893. fpc_unicodestr_decr_ref(Pointer(S));
  894. Pointer(S):=Temp;
  895. end;
  896. { Force nil termination in case it gets shorter }
  897. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  898. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=nl;
  899. end
  900. else { length=0, deallocate the string }
  901. fpc_unicodestr_decr_ref (Pointer(S));
  902. end;
  903. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  904. {*****************************************************************************
  905. Public functions, In interface.
  906. *****************************************************************************}
  907. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  908. begin
  909. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  910. end;
  911. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  912. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  913. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  914. begin
  915. result:=StringToWideChar(Src,Dest,DestSize);
  916. end;
  917. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  918. function WideCharToString(S : PWideChar) : UnicodeString;
  919. begin
  920. result:=WideCharLenToString(s,Length(WideString(s)));
  921. end;
  922. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  923. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  924. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  925. var
  926. temp: widestring;
  927. Len: SizeInt;
  928. begin
  929. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  930. Len:=Length(temp);
  931. if DestSize<=Len then
  932. Len:=Destsize-1;
  933. move(temp[1],Dest^,Len*SizeOf(WideChar));
  934. Dest[Len]:=#0;
  935. result:=Dest;
  936. end;
  937. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  938. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  939. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  940. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  941. begin
  942. SetLength(result,Len);
  943. Move(S^,Pointer(Result)^,Len*2);
  944. end;
  945. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  946. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  947. begin
  948. Dest:=UnicodeCharLenToString(Src,Len);
  949. end;
  950. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  951. begin
  952. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  953. end;
  954. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  955. begin
  956. Dest:=AnsiString(UnicodeCharToString(S));
  957. end;
  958. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  959. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  960. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  961. begin
  962. SetLength(result,Len);
  963. Move(S^,Pointer(Result)^,Len*2);
  964. end;
  965. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  966. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  967. begin
  968. Dest:=WideCharLenToString(Src,Len);
  969. end;
  970. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  971. begin
  972. Dest:=AnsiString(WideCharLenToString(Src,Len));
  973. end;
  974. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  975. begin
  976. Dest:=WideCharToString(S);
  977. end;
  978. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  979. begin
  980. Dest:=AnsiString(WideCharToString(S));
  981. end;
  982. Function fpc_unicodestr_Unique_func(Var S : UnicodeString): Pointer; external name 'FPC_UNICODESTR_UNIQUE';
  983. Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
  984. begin
  985. fpc_unicodestr_Unique_func(S);
  986. end;
  987. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  988. {$define FPC_HAS_UNICODESTR_UNIQUE}
  989. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  990. {
  991. Make sure reference count of S is 1,
  992. using copy-on-write semantics.
  993. }
  994. Var
  995. SNew : Pointer;
  996. L : SizeInt;
  997. begin
  998. pointer(result) := pointer(s);
  999. If Pointer(S)=Nil then
  1000. exit;
  1001. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1002. begin
  1003. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1004. SNew:=NewUnicodeString (L);
  1005. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1006. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1007. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1008. pointer(S):=SNew;
  1009. pointer(result):=SNew;
  1010. end;
  1011. end;
  1012. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1013. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1014. {$define FPC_HAS_UNICODESTR_COPY}
  1015. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1016. var
  1017. ResultAddress : Pointer;
  1018. begin
  1019. ResultAddress:=Nil;
  1020. dec(index);
  1021. if Index < 0 then
  1022. Index := 0;
  1023. { Check Size. Accounts for Zero-length S, the double check is needed because
  1024. Size can be maxint and will get <0 when adding index }
  1025. if (Size>Length(S)) or
  1026. (Index+Size>Length(S)) then
  1027. Size:=Length(S)-Index;
  1028. If Size>0 then
  1029. begin
  1030. ResultAddress:=NewUnicodeString(Size);
  1031. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1032. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1033. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1034. end;
  1035. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1036. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1037. end;
  1038. {$endif FPC_HAS_UNICODESTR_COPY}
  1039. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1040. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1041. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1042. var
  1043. i,MaxLen : SizeInt;
  1044. pc : punicodechar;
  1045. begin
  1046. Pos:=0;
  1047. if (Length(SubStr)>0) and (Offset>0) and (Offset<=Length(Source)) then
  1048. begin
  1049. MaxLen:=Length(source)-Length(SubStr)-(OffSet-1);
  1050. i:=0;
  1051. pc:=@source[OffSet];
  1052. while (i<=MaxLen) do
  1053. begin
  1054. inc(i);
  1055. if (SubStr[1]=pc^) and
  1056. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1057. begin
  1058. Pos:=Offset+i-1;
  1059. exit;
  1060. end;
  1061. inc(pc);
  1062. end;
  1063. end;
  1064. end;
  1065. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1066. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1067. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1068. { Faster version for a unicodechar alone }
  1069. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1070. var
  1071. i: SizeInt;
  1072. pc : punicodechar;
  1073. begin
  1074. if (Offset>0) and (Offset<=length(s)) then
  1075. begin
  1076. pc:=@s[OffSet];
  1077. for i:=OffSet to length(s) do
  1078. begin
  1079. if pc^=c then
  1080. begin
  1081. pos:=i;
  1082. exit;
  1083. end;
  1084. inc(pc);
  1085. end;
  1086. end;
  1087. pos:=0;
  1088. end;
  1089. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1090. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1091. block, which is significant bloat without any sensible speed improvement. }
  1092. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1093. begin
  1094. result:=Pos(UnicodeString(c),s,offset);
  1095. end;
  1096. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1097. begin
  1098. result:=Pos(UnicodeString(c),s,OffSet);
  1099. end;
  1100. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1101. begin
  1102. result:=Pos(c,UnicodeString(s),OffSet);
  1103. end;
  1104. {$ifndef FPC_HAS_UNICODESTR_OF_CHAR}
  1105. {$define FPC_HAS_UNICODESTR_OF_CHAR}
  1106. Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
  1107. begin
  1108. SetLength(StringOfChar,l);
  1109. FillWord(Pointer(StringOfChar)^,Length(StringOfChar),word(c));
  1110. end;
  1111. {$endif}
  1112. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1113. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1114. { Faster version for a char alone. Must be implemented because }
  1115. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1116. { using pos(char,pchar) will always call the shortstring version }
  1117. { (exact match for first argument), also with $h+ (JM) }
  1118. Function Pos (c : Char; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1119. var
  1120. i: SizeInt;
  1121. wc : unicodechar;
  1122. pc : punicodechar;
  1123. begin
  1124. if (Offset>0) and (Offset<=Length(S)) then
  1125. begin
  1126. wc:=c;
  1127. pc:=@s[OffSet];
  1128. for i:=OffSet to length(s) do
  1129. begin
  1130. if pc^=wc then
  1131. begin
  1132. pos:=i;
  1133. exit;
  1134. end;
  1135. inc(pc);
  1136. end;
  1137. end;
  1138. pos:=0;
  1139. end;
  1140. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1141. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1142. {$define FPC_HAS_DELETE_UNICODESTR}
  1143. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1144. Var
  1145. LS : SizeInt;
  1146. begin
  1147. LS:=Length(S);
  1148. if (Index>LS) or (Index<=0) or (Size<=0) then
  1149. exit;
  1150. UniqueString (S);
  1151. { (Size+Index) will overflow if Size=MaxInt. }
  1152. if Size>LS-Index then
  1153. Size:=LS-Index+1;
  1154. if Size<=LS-Index then
  1155. begin
  1156. Dec(Index);
  1157. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1158. end;
  1159. Setlength(s,LS-Size);
  1160. end;
  1161. {$endif FPC_HAS_DELETE_UNICODESTR}
  1162. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1163. {$define FPC_HAS_INSERT_UNICODESTR}
  1164. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1165. var
  1166. Temp : UnicodeString;
  1167. LS : SizeInt;
  1168. begin
  1169. If Length(Source)=0 then
  1170. exit;
  1171. if index <= 0 then
  1172. index := 1;
  1173. Ls:=Length(S);
  1174. if index > LS then
  1175. index := LS+1;
  1176. Dec(Index);
  1177. SetLength(Temp,Length(Source)+LS);
  1178. If Index>0 then
  1179. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1180. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1181. If (LS-Index)>0 then
  1182. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1183. S:=Temp;
  1184. end;
  1185. {$endif FPC_HAS_INSERT_UNICODESTR}
  1186. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1187. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1188. Function UpCase(c:UnicodeChar):UnicodeChar;
  1189. begin
  1190. if (word(c)>=Ord('a')) and (word(c)<=Ord('z')) then
  1191. Result:= UnicodeChar(word(c)-32)
  1192. else
  1193. if word(c)>=128 then
  1194. Result:= widestringmanager.UpperUnicodeStringProc(UnicodeString(c))[1]
  1195. else
  1196. Result:= c;
  1197. end;
  1198. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1199. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1200. {$define FPC_HAS_UPCASE_UNICODESTR}
  1201. function UpCase(const s : UnicodeString) : UnicodeString;
  1202. begin
  1203. result:=widestringmanager.UpperUnicodeStringProc(s);
  1204. end;
  1205. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1206. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1207. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1208. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1209. begin
  1210. if (word(c)>=Ord('A')) and (word(c)<=Ord('Z')) then
  1211. Result:= UnicodeChar(word(c)+32)
  1212. else
  1213. if word(c)>=128 then
  1214. Result:= widestringmanager.LowerUnicodeStringProc(UnicodeString(c))[1]
  1215. else
  1216. Result:= c;
  1217. end;
  1218. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1219. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1220. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1221. function LowerCase(const s : UnicodeString) : UnicodeString;
  1222. begin
  1223. result:=widestringmanager.LowerUnicodeStringProc(s);
  1224. end;
  1225. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1226. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1227. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1228. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1229. begin
  1230. SetLength(S,Len);
  1231. If (Buf<>Nil) and (Len>0) then
  1232. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1233. end;
  1234. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1235. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1236. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1237. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1238. begin
  1239. If (Buf<>Nil) and (Len>0) then
  1240. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1241. else
  1242. SetLength(S,Len);
  1243. end;
  1244. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1245. {$ifndef FPUNONE}
  1246. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1247. Var
  1248. SS: ShortString;
  1249. begin
  1250. fpc_Val_Real_UnicodeStr:=0;
  1251. if length(S)>255 then
  1252. code:=256
  1253. else
  1254. begin
  1255. SS:=ShortString(S);
  1256. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1257. end;
  1258. end;
  1259. {$endif}
  1260. {$ifndef FPC_STR_ENUM_INTERN}
  1261. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1262. var
  1263. ss: ShortString;
  1264. begin
  1265. if length(s)>255 then
  1266. code:=256
  1267. else
  1268. begin
  1269. ss:=ShortString(s);
  1270. val(ss,fpc_val_enum_unicodestr,code);
  1271. end;
  1272. end;
  1273. {$endif FPC_STR_ENUM_INTERN}
  1274. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1275. Var
  1276. SS: ShortString;
  1277. begin
  1278. if length(S)>255 then
  1279. begin
  1280. fpc_Val_Currency_UnicodeStr:=0;
  1281. code:=256;
  1282. end
  1283. else
  1284. begin
  1285. SS:=ShortString(S);
  1286. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1287. end;
  1288. end;
  1289. Function fpc_Val_UInt_UnicodeStr ({$ifndef VER3_2}DestSize: SizeInt;{$endif VER3_2} Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1290. Var
  1291. SS: ShortString;
  1292. begin
  1293. fpc_Val_UInt_UnicodeStr:=0;
  1294. if length(S)>255 then
  1295. code:=256
  1296. else
  1297. begin
  1298. SS:=ShortString(S);
  1299. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1300. end;
  1301. end;
  1302. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1303. Var
  1304. SS: ShortString;
  1305. begin
  1306. fpc_Val_SInt_UnicodeStr:=0;
  1307. if length(S)>255 then
  1308. code:=256
  1309. else
  1310. begin
  1311. SS:=ShortString(S);
  1312. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1313. end;
  1314. end;
  1315. {$ifndef CPU64}
  1316. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1317. Var
  1318. SS: ShortString;
  1319. begin
  1320. fpc_Val_qword_UnicodeStr:=0;
  1321. if length(S)>255 then
  1322. code:=256
  1323. else
  1324. begin
  1325. SS:=ShortString(S);
  1326. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1327. end;
  1328. end;
  1329. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1330. Var
  1331. SS: ShortString;
  1332. begin
  1333. fpc_Val_int64_UnicodeStr:=0;
  1334. if length(S)>255 then
  1335. code:=256
  1336. else
  1337. begin
  1338. SS:=ShortString(S);
  1339. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1340. end;
  1341. end;
  1342. {$endif CPU64}
  1343. {$if defined(CPU16) or defined(CPU8)}
  1344. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1345. Var
  1346. SS: ShortString;
  1347. begin
  1348. fpc_Val_longword_UnicodeStr:=0;
  1349. if length(S)>255 then
  1350. code:=256
  1351. else
  1352. begin
  1353. SS:=ShortString(S);
  1354. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1355. end;
  1356. end;
  1357. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1358. Var
  1359. SS: ShortString;
  1360. begin
  1361. fpc_Val_longint_UnicodeStr:=0;
  1362. if length(S)>255 then
  1363. code:=256
  1364. else
  1365. begin
  1366. SS:=ShortString(S);
  1367. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1368. end;
  1369. end;
  1370. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1371. Var
  1372. SS: ShortString;
  1373. begin
  1374. fpc_Val_word_UnicodeStr:=0;
  1375. if length(S)>255 then
  1376. code:=256
  1377. else
  1378. begin
  1379. SS:=ShortString(S);
  1380. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1381. end;
  1382. end;
  1383. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1384. Var
  1385. SS: ShortString;
  1386. begin
  1387. fpc_Val_smallint_UnicodeStr:=0;
  1388. if length(S)>255 then
  1389. code:=256
  1390. else
  1391. begin
  1392. SS:=ShortString(S);
  1393. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1394. end;
  1395. end;
  1396. {$endif CPU16 or CPU8}
  1397. {$ifndef FPUNONE}
  1398. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1399. var
  1400. ss: shortstring;
  1401. begin
  1402. str_real(len,fr,d,treal_type(rt),ss);
  1403. s:=UnicodeString(ss);
  1404. end;
  1405. {$endif}
  1406. {$ifndef FPC_STR_ENUM_INTERN}
  1407. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1408. var
  1409. ss: ShortString;
  1410. begin
  1411. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1412. s:=UnicodeString(ss);
  1413. end;
  1414. {$endif FPC_STR_ENUM_INTERN}
  1415. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1416. var
  1417. ss: ShortString;
  1418. begin
  1419. fpc_shortstr_bool(b,len,ss);
  1420. s:=UnicodeString(ss);
  1421. end;
  1422. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1423. var
  1424. ss: shortstring;
  1425. begin
  1426. str(c:len:fr,ss);
  1427. s:=UnicodeString(ss);
  1428. end;
  1429. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1430. Var
  1431. SS: ShortString;
  1432. begin
  1433. Str (v:Len,SS);
  1434. S:=UnicodeString(SS);
  1435. end;
  1436. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1437. Var
  1438. SS: ShortString;
  1439. begin
  1440. str(v:Len,SS);
  1441. S:=UnicodeString(SS);
  1442. end;
  1443. {$ifndef CPU64}
  1444. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1445. Var
  1446. SS: ShortString;
  1447. begin
  1448. Str (v:Len,SS);
  1449. S:=UnicodeString(SS);
  1450. end;
  1451. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1452. Var
  1453. SS: ShortString;
  1454. begin
  1455. str(v:Len,SS);
  1456. S:=UnicodeString(SS);
  1457. end;
  1458. {$endif CPU64}
  1459. {$if defined(CPU16) or defined(CPU8)}
  1460. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1461. Var
  1462. SS: ShortString;
  1463. begin
  1464. Str (v:Len,SS);
  1465. S:=UnicodeString(SS);
  1466. end;
  1467. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1468. Var
  1469. SS: ShortString;
  1470. begin
  1471. str(v:Len,SS);
  1472. S:=UnicodeString(SS);
  1473. end;
  1474. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1475. Var
  1476. SS: ShortString;
  1477. begin
  1478. Str (v:Len,SS);
  1479. S:=UnicodeString(SS);
  1480. end;
  1481. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1482. Var
  1483. SS: ShortString;
  1484. begin
  1485. str(v:Len,SS);
  1486. S:=UnicodeString(SS);
  1487. end;
  1488. {$endif CPU16 or CPU8}
  1489. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1490. begin
  1491. if assigned(Source) then
  1492. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1493. else
  1494. Result:=0;
  1495. end;
  1496. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1497. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1498. begin
  1499. runerror(217);
  1500. end;
  1501. {$else EXCLUDE_COMPLEX_PROCS}
  1502. var
  1503. i,j : SizeUInt;
  1504. lw : longword;
  1505. begin
  1506. result:=0;
  1507. if source=nil then
  1508. exit;
  1509. i:=0;
  1510. j:=0;
  1511. if assigned(Dest) then
  1512. begin
  1513. while (i<SourceChars) and (j<MaxDestBytes) do
  1514. begin
  1515. lw:=ord(Source[i]);
  1516. case lw of
  1517. 0..$7f:
  1518. begin
  1519. Dest[j]:=char(lw);
  1520. inc(j);
  1521. end;
  1522. $80..$7ff:
  1523. begin
  1524. if j+1>=MaxDestBytes then
  1525. break;
  1526. Dest[j]:=char($c0 or (lw shr 6));
  1527. Dest[j+1]:=char($80 or (lw and $3f));
  1528. inc(j,2);
  1529. end;
  1530. $800..$d7ff,$e000..$ffff:
  1531. begin
  1532. if j+2>=MaxDestBytes then
  1533. break;
  1534. Dest[j]:=char($e0 or (lw shr 12));
  1535. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1536. Dest[j+2]:=char($80 or (lw and $3f));
  1537. inc(j,3);
  1538. end;
  1539. $d800..$dbff:
  1540. {High Surrogates}
  1541. begin
  1542. if j+3>=MaxDestBytes then
  1543. break;
  1544. if (i+1<sourcechars) and
  1545. (word(Source[i+1]) >= $dc00) and
  1546. (word(Source[i+1]) <= $dfff) then
  1547. begin
  1548. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1549. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1550. Dest[j]:=char($f0 or (lw shr 18));
  1551. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1552. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1553. Dest[j+3]:=char($80 or (lw and $3f));
  1554. inc(j,4);
  1555. inc(i);
  1556. end;
  1557. end;
  1558. end;
  1559. inc(i);
  1560. end;
  1561. if j>SizeUInt(MaxDestBytes-1) then
  1562. j:=MaxDestBytes-1;
  1563. Dest[j]:=#0;
  1564. end
  1565. else
  1566. begin
  1567. while i<SourceChars do
  1568. begin
  1569. case word(Source[i]) of
  1570. $0..$7f:
  1571. inc(j);
  1572. $80..$7ff:
  1573. inc(j,2);
  1574. $800..$d7ff,$e000..$ffff:
  1575. inc(j,3);
  1576. $d800..$dbff:
  1577. begin
  1578. if (i+1<sourcechars) and
  1579. (word(Source[i+1]) >= $dc00) and
  1580. (word(Source[i+1]) <= $dfff) then
  1581. begin
  1582. inc(j,4);
  1583. inc(i);
  1584. end;
  1585. end;
  1586. end;
  1587. inc(i);
  1588. end;
  1589. end;
  1590. result:=j+1;
  1591. end;
  1592. {$endif EXCLUDE_COMPLEX_PROCS}
  1593. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1594. begin
  1595. if assigned(Source) then
  1596. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
  1597. else
  1598. Result:=0;
  1599. end;
  1600. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1601. begin
  1602. Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
  1603. end;
  1604. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
  1605. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1606. begin
  1607. runerror(217);
  1608. end;
  1609. {$else EXCLUDE_COMPLEX_PROCS}
  1610. const
  1611. UNICODE_INVALID=63;
  1612. var
  1613. InputUTF8: SizeUInt;
  1614. IBYTE: BYTE;
  1615. OutputUnicode: SizeUInt;
  1616. PRECHAR: SizeUInt;
  1617. TempBYTE: BYTE;
  1618. CharLen: SizeUint;
  1619. LookAhead: SizeUInt;
  1620. UC: SizeUInt;
  1621. begin
  1622. if not assigned(Source) then
  1623. begin
  1624. result:=0;
  1625. exit;
  1626. end;
  1627. result:=SizeUInt(-1);
  1628. InputUTF8:=0;
  1629. OutputUnicode:=0;
  1630. PreChar:=0;
  1631. if Assigned(Dest) Then
  1632. begin
  1633. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1634. begin
  1635. IBYTE:=byte(Source[InputUTF8]);
  1636. if (IBYTE and $80) = 0 then
  1637. begin
  1638. // One character US-ASCII, convert it to unicode
  1639. // Commented code to convert LF to CRLF has been removed
  1640. Dest[OutputUnicode]:=WideChar(IBYTE);
  1641. inc(OutputUnicode);
  1642. PreChar:=IBYTE;
  1643. inc(InputUTF8);
  1644. end
  1645. else
  1646. begin
  1647. TempByte:=IBYTE;
  1648. CharLen:=0;
  1649. while (TempBYTE and $80)<>0 do
  1650. begin
  1651. TempBYTE:=(TempBYTE shl 1) and $FE;
  1652. inc(CharLen);
  1653. end;
  1654. //Test for the "CharLen" conforms UTF-8 string
  1655. //This means the 10xxxxxx pattern.
  1656. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1657. begin
  1658. //Insuficient chars in string to decode
  1659. //UTF-8 array. Fallback to single char.
  1660. CharLen:= 1;
  1661. end;
  1662. for LookAhead := 1 to CharLen-1 do
  1663. begin
  1664. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1665. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1666. begin
  1667. //Invalid UTF-8 sequence, fallback.
  1668. CharLen:= LookAhead;
  1669. break;
  1670. end;
  1671. end;
  1672. UC:=$FFFF;
  1673. case CharLen of
  1674. 1: begin
  1675. //Not valid UTF-8 sequence
  1676. UC:=UNICODE_INVALID;
  1677. end;
  1678. 2: begin
  1679. //Two bytes UTF, convert it
  1680. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1681. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1682. if UC <= $7F then
  1683. begin
  1684. //Invalid UTF sequence.
  1685. UC:=UNICODE_INVALID;
  1686. end;
  1687. end;
  1688. 3: begin
  1689. //Three bytes, convert it to unicode
  1690. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1691. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1692. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1693. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1694. begin
  1695. //Invalid UTF-8 sequence
  1696. UC:= UNICODE_INVALID;
  1697. End;
  1698. end;
  1699. 4: begin
  1700. //Four bytes, convert it to two unicode characters
  1701. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1702. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1703. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1704. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1705. if (UC < $10000) or (UC > $10FFFF) then
  1706. begin
  1707. UC:= UNICODE_INVALID;
  1708. end
  1709. else
  1710. begin
  1711. { only store pair if room }
  1712. dec(UC,$10000);
  1713. if (OutputUnicode<MaxDestChars-1) then
  1714. begin
  1715. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1716. inc(OutputUnicode);
  1717. UC:=(UC and $3ff) + $DC00;
  1718. end
  1719. else
  1720. begin
  1721. InputUTF8:= InputUTF8 + CharLen;
  1722. { don't store anything }
  1723. CharLen:=0;
  1724. end;
  1725. end;
  1726. end;
  1727. 5,6,7: begin
  1728. //Invalid UTF8 to unicode conversion,
  1729. //mask it as invalid UNICODE too.
  1730. UC:=UNICODE_INVALID;
  1731. end;
  1732. end;
  1733. if CharLen > 0 then
  1734. begin
  1735. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1736. HandleError(231); // Will be converted to EConversionError in sysutils
  1737. PreChar:=UC;
  1738. Dest[OutputUnicode]:=WideChar(UC);
  1739. inc(OutputUnicode);
  1740. end;
  1741. InputUTF8:= InputUTF8 + CharLen;
  1742. end;
  1743. end;
  1744. Result:=OutputUnicode+1;
  1745. end
  1746. else
  1747. begin
  1748. while (InputUTF8<SourceBytes) do
  1749. begin
  1750. IBYTE:=byte(Source[InputUTF8]);
  1751. if (IBYTE and $80) = 0 then
  1752. begin
  1753. // One character US-ASCII, convert it to unicode
  1754. // Commented code to convert LF to CRLF has been removed
  1755. inc(OutputUnicode);
  1756. PreChar:=IBYTE;
  1757. inc(InputUTF8);
  1758. end
  1759. else
  1760. begin
  1761. TempByte:=IBYTE;
  1762. CharLen:=0;
  1763. while (TempBYTE and $80)<>0 do
  1764. begin
  1765. TempBYTE:=(TempBYTE shl 1) and $FE;
  1766. inc(CharLen);
  1767. end;
  1768. //Test for the "CharLen" conforms UTF-8 string
  1769. //This means the 10xxxxxx pattern.
  1770. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1771. begin
  1772. //Insuficient chars in string to decode
  1773. //UTF-8 array. Fallback to single char.
  1774. CharLen:= 1;
  1775. end;
  1776. for LookAhead := 1 to CharLen-1 do
  1777. begin
  1778. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1779. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1780. begin
  1781. //Invalid UTF-8 sequence, fallback.
  1782. CharLen:= LookAhead;
  1783. break;
  1784. end;
  1785. end;
  1786. UC:=$FFFF;
  1787. case CharLen of
  1788. 1: begin
  1789. //Not valid UTF-8 sequence
  1790. UC:=UNICODE_INVALID;
  1791. end;
  1792. 2: begin
  1793. //Two bytes UTF, convert it
  1794. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1795. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1796. if UC <= $7F then
  1797. begin
  1798. //Invalid UTF sequence.
  1799. UC:=UNICODE_INVALID;
  1800. end;
  1801. end;
  1802. 3: begin
  1803. //Three bytes, convert it to unicode
  1804. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1805. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1806. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1807. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1808. begin
  1809. //Invalid UTF-8 sequence
  1810. UC:= UNICODE_INVALID;
  1811. end;
  1812. end;
  1813. 4: begin
  1814. //Four bytes, convert it to two unicode characters
  1815. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1816. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1817. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1818. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1819. if (UC < $10000) or (UC > $10FFFF) then
  1820. UC:= UNICODE_INVALID
  1821. else
  1822. { extra character character }
  1823. inc(OutputUnicode);
  1824. end;
  1825. 5,6,7: begin
  1826. //Invalid UTF8 to unicode conversion,
  1827. //mask it as invalid UNICODE too.
  1828. UC:=UNICODE_INVALID;
  1829. end;
  1830. end;
  1831. if CharLen > 0 then
  1832. begin
  1833. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1834. HandleError(231); // Will be converted to EConversionError in sysutils
  1835. PreChar:=UC;
  1836. inc(OutputUnicode);
  1837. end;
  1838. InputUTF8:= InputUTF8 + CharLen;
  1839. end;
  1840. end;
  1841. Result:=OutputUnicode+1;
  1842. end;
  1843. end;
  1844. {$endif EXCLUDE_COMPLEX_PROCS}
  1845. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1846. begin
  1847. Result:=UTF8Encode(UnicodeString(s));
  1848. end;
  1849. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1850. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1851. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1852. var
  1853. i : SizeInt;
  1854. hs : UTF8String;
  1855. begin
  1856. result:='';
  1857. if s='' then
  1858. exit;
  1859. SetLength(hs,length(s)*3);
  1860. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1861. if i>0 then
  1862. begin
  1863. SetLength(hs,i-1);
  1864. result:=hs;
  1865. end;
  1866. end;
  1867. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1868. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1869. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1870. function UTF8Decode(const s : RawByteString): UnicodeString;
  1871. var
  1872. i : SizeInt;
  1873. hs : UnicodeString;
  1874. begin
  1875. result:='';
  1876. if s='' then
  1877. exit;
  1878. SetLength(hs,length(s));
  1879. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1880. if i>0 then
  1881. begin
  1882. SetLength(hs,i-1);
  1883. result:=hs;
  1884. end;
  1885. end;
  1886. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1887. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1888. begin
  1889. Result:=Utf8Encode(s);
  1890. end;
  1891. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1892. begin
  1893. Result:=RawByteString(Utf8Decode(s));
  1894. end;
  1895. {$ifdef FPC_HAS_FEATURE_DYNARRAYS}
  1896. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1897. var
  1898. i, reslen: sizeint;
  1899. w: longint;
  1900. begin
  1901. reslen:=0;
  1902. i:=0;
  1903. { calculate required length }
  1904. while (i<len) do
  1905. begin
  1906. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1907. inc(i)
  1908. else if (p[i]<=#$dbff) and
  1909. (i+1<len) and
  1910. (p[i+1]>=#$dc00) and
  1911. (p[i+1]<=#$dfff) then
  1912. inc(i,2)
  1913. else
  1914. inc(i);
  1915. inc(reslen);
  1916. end;
  1917. SetLength(res,reslen+1); { +1 for null termination }
  1918. reslen:=0;
  1919. i:=0;
  1920. { do conversion }
  1921. while (i<len) do
  1922. begin
  1923. w:=ord(p[i]);
  1924. if (w<=$d7ff) or (w>=$e000) then
  1925. res[reslen]:=w
  1926. else if (w<=$dbff) and
  1927. (i+1<len) and
  1928. (p[i+1]>=#$dc00) and
  1929. (p[i+1]<=#$dfff) then
  1930. begin
  1931. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1932. inc(i);
  1933. end
  1934. else { invalid surrogate pair }
  1935. res[reslen]:=w;
  1936. inc(i);
  1937. inc(reslen);
  1938. end;
  1939. res[reslen]:=0;
  1940. end;
  1941. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1942. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1943. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1944. begin
  1945. UCS4Encode(PWideChar(s),Length(s),result);
  1946. end;
  1947. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1948. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1949. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1950. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1951. begin
  1952. UCS4Encode(PWideChar(s),Length(s),result);
  1953. end;
  1954. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1955. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1956. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1957. { dest should point to previously allocated wide/unicodestring }
  1958. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1959. var
  1960. i: sizeint;
  1961. nc: UCS4Char;
  1962. begin
  1963. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1964. begin
  1965. nc:=s[i];
  1966. if (nc<=$ffff) then
  1967. dest^:=widechar(nc)
  1968. else if (dword(nc)<=$10ffff) then
  1969. begin
  1970. dest^:=widechar(nc shr 10 + $d7c0);
  1971. { subtracting $10000 doesn't change low 10 bits }
  1972. dest[1]:=widechar(nc and $3ff + $dc00);
  1973. inc(dest);
  1974. end
  1975. else { invalid code point }
  1976. dest^:='?';
  1977. inc(dest);
  1978. end;
  1979. end;
  1980. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1981. var
  1982. i : SizeInt;
  1983. reslen : SizeInt;
  1984. begin
  1985. reslen:=0;
  1986. for i:=0 to length(s)-2 do { skip terminating #0 }
  1987. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1988. SetLength(result,reslen);
  1989. UCS4Decode(s,pointer(result));
  1990. end;
  1991. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1992. var
  1993. i : SizeInt;
  1994. reslen : SizeInt;
  1995. begin
  1996. reslen:=0;
  1997. for i:=0 to length(s)-2 do { skip terminating #0 }
  1998. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1999. SetLength(result,reslen);
  2000. UCS4Decode(s,pointer(result));
  2001. end;
  2002. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  2003. {$endif FPC_HAS_FEATURE_DYNARRAYS}
  2004. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2005. const
  2006. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  2007. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  2008. procedure unimplementedunicodestring;
  2009. begin
  2010. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2011. {$ifndef HAS_WIDESTRINGMANAGER}
  2012. If IsConsole then
  2013. begin
  2014. Writeln(StdErr,SNoUnicodestrings);
  2015. Writeln(StdErr,SRecompileWithUnicodestrings);
  2016. end;
  2017. {$endif HAS_WIDESTRINGMANAGER}
  2018. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2019. HandleErrorAddrFrameInd(234{RuntimeErrorExitCodes[reCodesetConversion]},get_pc_addr,get_frame);
  2020. end;
  2021. function StringElementSize(const S: UnicodeString): Word; overload;
  2022. begin
  2023. if assigned(Pointer(S)) then
  2024. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2025. else
  2026. Result:=SizeOf(UnicodeChar);
  2027. end;
  2028. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2029. begin
  2030. if assigned(Pointer(S)) then
  2031. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2032. else
  2033. Result:=0;
  2034. end;
  2035. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2036. begin
  2037. {$ifdef FPC_HAS_CPSTRING}
  2038. if assigned(Pointer(S)) then
  2039. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2040. else
  2041. {$endif FPC_HAS_CPSTRING}
  2042. Result:=DefaultUnicodeCodePage;
  2043. end;
  2044. {$push}
  2045. {$warnings off}
  2046. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2047. begin
  2048. unimplementedunicodestring;
  2049. end;
  2050. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2051. begin
  2052. unimplementedunicodestring;
  2053. end;
  2054. function StubWideCase(const s: WideString): WideString;
  2055. begin
  2056. unimplementedunicodestring;
  2057. end;
  2058. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2059. begin
  2060. unimplementedunicodestring;
  2061. end;
  2062. {$pop}
  2063. procedure initunicodestringmanager;
  2064. begin
  2065. {$ifndef HAS_WIDESTRINGMANAGER}
  2066. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2067. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2068. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2069. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2070. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2071. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2072. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2073. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2074. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2075. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2076. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2077. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2078. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2079. {$endif HAS_WIDESTRINGMANAGER}
  2080. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2081. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2082. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2083. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2084. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2085. end;
  2086. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2087. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2088. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2089. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2090. Begin
  2091. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2092. DefaultFileSystemCodePage,Length(Str));
  2093. End;
  2094. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2095. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2096. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2097. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2098. Begin
  2099. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2100. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2101. End;
  2102. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2103. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2104. Begin
  2105. Result:=Str;
  2106. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2107. End;
  2108. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2109. ignore any codepage }
  2110. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2111. begin
  2112. Result := UTF8Decode(S);
  2113. end;
  2114. function UTF8ToString(const S: ShortString): UnicodeString;
  2115. Var
  2116. rs: RawByteString;
  2117. begin
  2118. rs:=S;
  2119. Result := UTF8Decode(rs);
  2120. end;
  2121. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2122. var
  2123. rs: RawByteString;
  2124. Count: Integer;
  2125. begin
  2126. Count := length(S);
  2127. SetLength(rs, Count);
  2128. if Count > 0 then
  2129. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2130. Result := UTF8ToString(rs);
  2131. end;
  2132. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2133. are as well }
  2134. {$ifndef CPUJVM}
  2135. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2136. var
  2137. rs: RawByteString;
  2138. Count: Integer;
  2139. begin
  2140. Count := Length(S);
  2141. SetLength(rs, Count);
  2142. if Count > 0 then
  2143. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2144. Result := UTF8ToString(rs);
  2145. end;
  2146. function UTF8ToString(const S: array of Byte): UnicodeString;
  2147. var
  2148. rs: RawByteString;
  2149. Count: Integer;
  2150. begin
  2151. Count := Length(S);
  2152. SetLength(rs, Count);
  2153. if Count > 0 then
  2154. fpc_pchar_ansistr_intern_charmove(pchar(@S),Low(S),rs,0,Count);
  2155. Result := UTF8ToString(rs);
  2156. end;
  2157. {$endif not CPUJVM}