ustrings.inc 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S= SizeOf(SizeInt), R= (if CPU64 then SizeOf(Longint) else SizeOf(SizeInt))):
  19. @-S-R : Reference count (R bytes)
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$if not defined(VER3_0) and not defined(VER3_2)}
  34. {$ifdef CPU64}
  35. Ref : Longint;
  36. {$else}
  37. Ref : SizeInt;
  38. {$endif}
  39. {$else}
  40. {$ifdef CPU64}
  41. { align fields }
  42. Dummy : DWord;
  43. {$endif CPU64}
  44. Ref : SizeInt;
  45. {$endif}
  46. Len : SizeInt;
  47. end;
  48. Const
  49. UnicodeFirstOff = SizeOf(TUnicodeRec);
  50. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  51. {
  52. Default UnicodeChar <-> AnsiChar conversion is to only convert the
  53. lower 127 chars, all others are translated to '?'.
  54. These routines can be overridden for the Current Locale
  55. }
  56. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  57. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  58. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. p : PAnsiChar;
  62. begin
  63. setlength(dest,len);
  64. if not assigned(pointer(dest)) then
  65. exit;
  66. SetCodePage(dest,cp,false);
  67. p:=pointer(dest); {SetLength guarantees that dest is unique}
  68. for i:=1 to len do
  69. begin
  70. if word(source^)<256 then
  71. p^:=AnsiChar(word(source^))
  72. else
  73. p^:='?';
  74. inc(source);
  75. inc(p);
  76. end;
  77. end;
  78. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  79. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  80. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  81. procedure DefaultAnsi2UnicodeMove(source:pansichar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  82. var
  83. i : SizeInt;
  84. p : PUnicodeChar;
  85. begin
  86. setlength(dest,len);
  87. p:=pointer(dest); {SetLength guarantees that dest is unique}
  88. for i:=1 to len do
  89. begin
  90. p^:=unicodechar(byte(source^));
  91. inc(source);
  92. inc(p);
  93. end;
  94. end;
  95. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  96. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  97. function DefaultCharLengthPChar(const Str: PAnsiChar): PtrInt;
  98. begin
  99. DefaultCharLengthPChar:=length(Str);
  100. end;
  101. function DefaultCodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
  102. begin
  103. if str[0]<>#0 then
  104. DefaultCodePointLength:=1
  105. else
  106. DefaultCodePointLength:=0;
  107. end;
  108. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  109. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  110. begin
  111. { don't raise an exception here. We need this for text file handling }
  112. if stdcp<>scpFileSystemSingleByte then
  113. Result:=DefaultSystemCodePage
  114. else
  115. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  116. without a fully functional widestring manager that will probably cause
  117. more problems that it solves }
  118. Result:=DefaultFileSystemCodePage
  119. end;
  120. Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
  121. begin
  122. manager:=widestringmanager;
  123. end;
  124. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
  125. begin
  126. Old:=widestringmanager;
  127. widestringmanager:=New;
  128. end;
  129. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  130. begin
  131. widestringmanager:=New;
  132. end;
  133. Procedure GetWideStringManager (out Manager : TUnicodeStringManager);
  134. begin
  135. manager:=widestringmanager;
  136. end;
  137. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out old: TUnicodeStringManager);
  138. begin
  139. Old:=widestringmanager;
  140. widestringmanager:=New;
  141. end;
  142. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  143. begin
  144. widestringmanager:=New;
  145. end;
  146. {****************************************************************************
  147. Internal functions, not in interface.
  148. ****************************************************************************}
  149. procedure UnicodeStringError;
  150. begin
  151. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  152. end;
  153. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  154. {$define FPC_HAS_NEW_UNICODESTRING}
  155. Function NewUnicodeString(Len : SizeInt) : Pointer;
  156. {
  157. Allocate a new UnicodeString on the heap.
  158. initialize it to zero length and reference count 1.
  159. }
  160. begin
  161. Result:=GetMem(Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  162. If Result=Nil then
  163. UnicodeStringError;
  164. PUnicodeRec(Result)^.Len:=Len; { Initial length }
  165. PUnicodeRec(Result)^.Ref:=1; { Initial Refcount }
  166. PUnicodeRec(Result)^.CodePage:=DefaultUnicodeCodePage;
  167. PUnicodeRec(Result)^.ElementSize:=SizeOf(UnicodeChar);
  168. inc(Result,UnicodeFirstOff); { Points to string now }
  169. PUnicodeChar(Result)^:=#0; { Terminating #0 }
  170. end;
  171. {$endif FPC_HAS_NEW_UNICODESTRING}
  172. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  173. {$define FPC_HAS_UNICODESTR_DECR_REF}
  174. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  175. {
  176. Decreases the ReferenceCount of a non constant unicodestring;
  177. If the reference count is zero, deallocate the string;
  178. }
  179. Var
  180. p: PUnicodeRec;
  181. Begin
  182. { Zero string }
  183. if S=Nil then
  184. exit;
  185. { check for constant strings ...}
  186. p:=PUnicodeRec(S-UnicodeFirstOff);
  187. S:=nil;
  188. if p^.Ref<0 then
  189. exit;
  190. { declocked does a MT safe dec and returns true, if the counter is 0 }
  191. if declocked(p^.Ref) then
  192. FreeMem(p);
  193. end;
  194. { alias for internal use }
  195. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  196. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  197. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  198. {$define FPC_HAS_UNICODESTR_INCR_REF}
  199. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  200. Begin
  201. If S=Nil then
  202. exit;
  203. { constant string ? }
  204. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  205. exit;
  206. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  207. end;
  208. { alias for internal use }
  209. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  210. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  211. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  212. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  213. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  214. {
  215. Converts a UnicodeString to a ShortString;
  216. }
  217. Var
  218. Size : SizeInt;
  219. temp : ansistring;
  220. begin
  221. res:='';
  222. Size:=Length(S2);
  223. if Size>0 then
  224. begin
  225. If Size>high(res) then
  226. Size:=high(res);
  227. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  228. res:=temp;
  229. end;
  230. end;
  231. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  232. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  233. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  234. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  235. {
  236. Converts a ShortString to a UnicodeString;
  237. }
  238. Var
  239. Size : SizeInt;
  240. begin
  241. result:='';
  242. Size:=Length(S2);
  243. if Size>0 then
  244. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(@S2[1]),DefaultSystemCodePage,result,Size);
  245. end;
  246. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  247. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  248. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  249. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  250. {
  251. Converts a UnicodeString to an AnsiString
  252. }
  253. Var
  254. Size : SizeInt;
  255. {$ifndef FPC_HAS_CPSTRING}
  256. cp : TSystemCodePage;
  257. {$endif FPC_HAS_CPSTRING}
  258. begin
  259. {$ifndef FPC_HAS_CPSTRING}
  260. cp:=DefaultSystemCodePage;
  261. {$endif FPC_HAS_CPSTRING}
  262. result:='';
  263. Size:=Length(S2);
  264. if Size>0 then
  265. begin
  266. cp:=TranslatePlaceholderCP(cp);
  267. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  268. end;
  269. end;
  270. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  271. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  272. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  273. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  274. {
  275. Converts an AnsiString to a UnicodeString;
  276. }
  277. Var
  278. Size : SizeInt;
  279. cp: TSystemCodePage;
  280. begin
  281. result:='';
  282. Size:=Length(S2);
  283. if Size>0 then
  284. begin
  285. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  286. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(S2),cp,result,Size);
  287. end;
  288. end;
  289. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  290. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  291. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  292. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  293. begin
  294. SetLength(Result,Length(S2));
  295. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  296. end;
  297. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  298. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  299. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  300. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  301. begin
  302. SetLength(Result,Length(S2));
  303. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  304. end;
  305. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  306. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  307. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  308. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  309. var
  310. Size : SizeInt;
  311. begin
  312. result:='';
  313. if p=nil then
  314. exit;
  315. Size := IndexWord(p^, -1, 0);
  316. Setlength(result,Size);
  317. if Size>0 then
  318. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  319. end;
  320. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  321. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  322. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  323. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  324. var
  325. Size : SizeInt;
  326. {$ifndef FPC_HAS_CPSTRING}
  327. cp : TSystemCodePage;
  328. {$endif FPC_HAS_CPSTRING}
  329. begin
  330. {$ifndef FPC_HAS_CPSTRING}
  331. cp:=DefaultSystemCodePage;
  332. {$endif FPC_HAS_CPSTRING}
  333. result:='';
  334. if p=nil then
  335. exit;
  336. Size := IndexWord(p^, -1, 0);
  337. if Size>0 then
  338. begin
  339. cp:=TranslatePlaceholderCP(cp);
  340. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  341. end;
  342. end;
  343. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  344. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  345. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  346. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  347. var
  348. Size : SizeInt;
  349. temp: ansistring;
  350. begin
  351. res:='';
  352. if p=nil then
  353. exit;
  354. Size:=IndexWord(p^, high(PtrInt), 0);
  355. if Size>0 then
  356. begin
  357. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  358. res:=temp;
  359. end;
  360. end;
  361. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  362. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  363. {$define FPC_UNICODESTR_ASSIGN}
  364. { checked against the ansistring routine, 2001-05-27 (FK) }
  365. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  366. {
  367. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  368. }
  369. begin
  370. If S2<>nil then
  371. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  372. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  373. { Decrease the reference count on the old S1 }
  374. fpc_unicodestr_decr_ref (S1);
  375. s1:=s2;
  376. end;
  377. { alias for internal use }
  378. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  379. {$endif FPC_UNICODESTR_ASSIGN}
  380. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  381. {$define FPC_HAS_UNICODESTR_CONCAT}
  382. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  383. Var
  384. S1Len,S2Len : SizeInt;
  385. OldDestP,NewDestP,RealDestP,Src : Pointer;
  386. begin
  387. { only assign if s1 or s2 is empty }
  388. if Length(S1)=0 then
  389. begin
  390. DestS:=s2;
  391. exit;
  392. end;
  393. if Length(S2)=0 then
  394. begin
  395. DestS:=s1;
  396. exit;
  397. end;
  398. S1Len:=PUnicodeRec(Pointer(S1)-UnicodeFirstOff)^.Len;
  399. S2Len:=PUnicodeRec(Pointer(S2)-UnicodeFirstOff)^.Len;
  400. OldDestP:=Pointer(DestS);
  401. { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
  402. if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
  403. begin
  404. RealDestP:=OldDestP-UnicodeFirstOff;
  405. NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+(S1Len+S2Len)*sizeof(UnicodeChar))+UnicodeFirstOff;
  406. { Copy S2 first, as in the case of OldDestP = Pointer(S2) it must be copied first and in other cases the order does not matter. }
  407. Src:=Pointer(S2);
  408. if Src=OldDestP then
  409. Src:=NewDestP;
  410. Move(Src^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
  411. if OldDestP<>Pointer(S1) then { Not an append, need to copy S1? }
  412. Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
  413. end
  414. else
  415. begin
  416. NewDestP:=NewUnicodeString(S1Len+S2Len);
  417. Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
  418. Move(Pointer(S2)^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
  419. fpc_unicodestr_decr_ref(Pointer(DestS));
  420. end;
  421. PUnicodeChar(NewDestP)[S1Len+S2Len]:=#0;
  422. PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=S1Len+S2Len;
  423. Pointer(DestS):=NewDestP;
  424. end;
  425. {$endif FPC_HAS_UNICODESTR_CONCAT}
  426. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  427. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  428. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  429. Var
  430. lowstart,i,Size,NewLen : SizeInt;
  431. p,pc,OldDestP,NewDestP,RealDestP : pointer;
  432. begin
  433. lowstart:=low(sarr);
  434. { skip empty strings }
  435. while (lowstart<=high(sarr)) and (sarr[lowstart]='') do
  436. inc(lowstart);
  437. if lowstart>high(sarr) then
  438. begin
  439. DestS:=''; { All source strings empty }
  440. exit;
  441. end;
  442. { Calculate size of the result so we can do
  443. a single call to SetLength() }
  444. NewLen:=0;
  445. for i:=lowstart to high(sarr) do
  446. inc(NewLen,length(sarr[i]));
  447. { In the case of the only nonempty string, return it directly. }
  448. if NewLen=PUnicodeRec(Pointer(sarr[lowstart])-UnicodeFirstOff)^.Len then
  449. begin
  450. DestS:=sarr[lowstart];
  451. exit;
  452. end;
  453. OldDestP:=Pointer(DestS);
  454. { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
  455. if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
  456. begin
  457. RealDestP:=OldDestP-UnicodeFirstOff;
  458. NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+NewLen*sizeof(UnicodeChar))+UnicodeFirstOff;
  459. { First string can be skipped if appending. }
  460. if OldDestP=Pointer(sarr[lowstart]) then
  461. inc(lowstart);
  462. end
  463. else
  464. begin
  465. { Create new string. }
  466. OldDestP:=nil; { This case is distinguished as "not assigned(olddestp)". Also prevents "if p=olddestp" in the loop below shared with the ReallocMem branch. }
  467. NewDestP:=NewUnicodeString(NewLen);
  468. end;
  469. { Copy strings from last to the first, so that possible occurences of DestS could read from the beginning of the reallocated DestS. }
  470. pc:=NewDestP+NewLen*sizeof(UnicodeChar);
  471. for i:=high(sarr) downto lowstart do
  472. begin
  473. p:=Pointer(sarr[i]);
  474. if not Assigned(p) then
  475. continue;
  476. if p=OldDestP then
  477. { DestS occured among pieces in the ReallocMem case! Use the new pointer. Its header still conveniently contains old DestS length. }
  478. p:=NewDestP;
  479. Size:=PUnicodeRec(p-UnicodeFirstOff)^.Len*sizeof(UnicodeChar);
  480. dec(pc,size);
  481. Move(p^,pc^,Size);
  482. end;
  483. if not assigned(OldDestP) then
  484. fpc_UnicodeStr_Decr_Ref(Pointer(DestS));
  485. PUnicodeChar(NewDestP)[NewLen]:=#0;
  486. PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=NewLen; { Careful, loop above relies on the old Len in the NewDestP header. }
  487. Pointer(DestS):=NewDestP;
  488. end;
  489. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  490. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  491. {$define FPC_HAS_CHAR_TO_UCHAR}
  492. Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
  493. var
  494. w: unicodestring;
  495. begin
  496. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  497. fpc_Char_To_UChar:=w[1];
  498. end;
  499. {$endif FPC_HAS_CHAR_TO_UCHAR}
  500. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  501. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  502. Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
  503. {
  504. Converts a AnsiChar to a UnicodeString;
  505. }
  506. begin
  507. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  508. end;
  509. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  510. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  511. {$define FPC_HAS_UCHAR_TO_CHAR}
  512. Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
  513. {
  514. Converts a UnicodeChar to a AnsiChar;
  515. }
  516. var
  517. s: ansistring;
  518. begin
  519. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  520. if length(s)=1 then
  521. fpc_UChar_To_Char:= s[1]
  522. else
  523. fpc_UChar_To_Char:='?';
  524. end;
  525. {$endif FPC_HAS_UCHAR_TO_CHAR}
  526. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  527. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  528. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  529. {
  530. Converts a WideChar to a ShortString;
  531. }
  532. var
  533. s: ansistring;
  534. begin
  535. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  536. result:=s;
  537. end;
  538. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  539. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  540. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  541. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  542. {
  543. Converts a UnicodeChar to a UnicodeString;
  544. }
  545. begin
  546. Setlength (fpc_UChar_To_UnicodeStr,1);
  547. fpc_UChar_To_UnicodeStr[1]:= c;
  548. end;
  549. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  550. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  551. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  552. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  553. {
  554. Converts a UnicodeChar to a AnsiString;
  555. }
  556. {$ifndef FPC_HAS_CPSTRING}
  557. var
  558. cp : TSystemCodePage;
  559. {$endif FPC_HAS_CPSTRING}
  560. begin
  561. {$ifndef FPC_HAS_CPSTRING}
  562. cp:=DefaultSystemCodePage;
  563. {$endif FPC_HAS_CPSTRING}
  564. cp:=TranslatePlaceholderCP(cp);
  565. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  566. end;
  567. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  568. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  569. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  570. Function fpc_PChar_To_UnicodeStr(const p : PAnsiChar): UnicodeString; compilerproc;
  571. Var
  572. L : SizeInt;
  573. begin
  574. if (not assigned(p)) or (p[0]=#0) Then
  575. begin
  576. fpc_pchar_to_unicodestr := '';
  577. exit;
  578. end;
  579. l:=IndexChar(p^,-1,#0);
  580. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  581. end;
  582. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  583. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  584. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  585. Function fpc_CharArray_To_UnicodeStr(const arr: array of ansichar; zerobased: boolean = true): UnicodeString; compilerproc;
  586. var
  587. i : SizeInt;
  588. begin
  589. if zerobased then
  590. begin
  591. if arr[0]=#0 Then
  592. begin
  593. fpc_chararray_to_unicodestr:='';
  594. exit;
  595. end;
  596. i:=IndexChar(arr,high(arr)+1,#0);
  597. if i=-1 then
  598. i:=high(arr)+1;
  599. end
  600. else
  601. i:=high(arr)+1;
  602. widestringmanager.Ansi2UnicodeMoveProc(pansichar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  603. end;
  604. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  605. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  606. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  607. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  608. var
  609. i : SizeInt;
  610. begin
  611. if (zerobased) then
  612. begin
  613. i:=IndexWord(arr,high(arr)+1,0);
  614. if i = -1 then
  615. i := high(arr)+1;
  616. end
  617. else
  618. i := high(arr)+1;
  619. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  620. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  621. end;
  622. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  623. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  624. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  625. { due to their names, the following procedures should be in wstrings.inc,
  626. however, the compiler generates code using this functions on all platforms }
  627. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  628. var
  629. l: longint;
  630. index: ptrint;
  631. len: byte;
  632. temp: ansistring;
  633. begin
  634. l := high(arr)+1;
  635. if l>=high(res)+1 then
  636. l:=high(res)
  637. else if l<0 then
  638. l:=0;
  639. if zerobased then
  640. begin
  641. index:=IndexWord(arr[0],l,0);
  642. if index<0 then
  643. len:=l
  644. else
  645. len:=index;
  646. end
  647. else
  648. len:=l;
  649. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  650. res:=temp;
  651. end;
  652. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  653. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  654. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  655. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  656. var
  657. i : SizeInt;
  658. {$ifndef FPC_HAS_CPSTRING}
  659. cp : TSystemCodePage;
  660. {$endif FPC_HAS_CPSTRING}
  661. begin
  662. {$ifndef FPC_HAS_CPSTRING}
  663. cp:=DefaultSystemCodePage;
  664. {$endif FPC_HAS_CPSTRING}
  665. if (zerobased) then
  666. begin
  667. i:=IndexWord(arr,high(arr)+1,0);
  668. if i = -1 then
  669. i := high(arr)+1;
  670. end
  671. else
  672. i := high(arr)+1;
  673. if i > 0 then
  674. begin
  675. cp:=TranslatePlaceholderCP(cp);
  676. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  677. end
  678. else
  679. fpc_WideCharArray_To_AnsiStr:='';
  680. end;
  681. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  682. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  683. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  684. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  685. var
  686. i : SizeInt;
  687. begin
  688. if (zerobased) then
  689. begin
  690. i:=IndexWord(arr,high(arr)+1,0);
  691. if i = -1 then
  692. i := high(arr)+1;
  693. end
  694. else
  695. i := high(arr)+1;
  696. SetLength(fpc_WideCharArray_To_WideStr,i);
  697. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  698. end;
  699. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  700. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  701. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  702. procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
  703. var
  704. len: SizeInt;
  705. temp: ansistring;
  706. begin
  707. len := length(src);
  708. { make sure we don't dereference src if it can be nil (JM) }
  709. if len > 0 then
  710. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  711. len := length(temp);
  712. if len > length(res) then
  713. len := length(res);
  714. {$push}
  715. {$r-}
  716. move(temp[1],res[0],len);
  717. fillchar(res[len],length(res)-len,0);
  718. {$pop}
  719. end;
  720. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  721. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  722. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  723. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  724. var
  725. len: SizeInt;
  726. temp: widestring;
  727. begin
  728. len := length(src);
  729. { make sure we don't dereference src if it can be nil (JM) }
  730. if len > 0 then
  731. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  732. len := length(temp);
  733. if len > length(res) then
  734. len := length(res);
  735. {$push}
  736. {$r-}
  737. move(temp[1],res[0],len*sizeof(widechar));
  738. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  739. {$pop}
  740. end;
  741. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  742. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  743. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  744. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  745. var
  746. len: longint;
  747. temp : widestring;
  748. begin
  749. len := length(src);
  750. { make sure we don't access AnsiChar 1 if length is 0 (JM) }
  751. if len > 0 then
  752. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),DefaultSystemCodePage,temp,len);
  753. len := length(temp);
  754. if len > length(res) then
  755. len := length(res);
  756. {$push}
  757. {$r-}
  758. move(temp[1],res[0],len*sizeof(widechar));
  759. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  760. {$pop}
  761. end;
  762. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  763. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  764. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  765. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  766. var
  767. len: SizeInt;
  768. begin
  769. len := length(src);
  770. if len > length(res) then
  771. len := length(res);
  772. {$push}
  773. {$r-}
  774. { make sure we don't try to access element 1 of the widestring if it's nil }
  775. if len > 0 then
  776. move(src[1],res[0],len*SizeOf(WideChar));
  777. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  778. {$pop}
  779. end;
  780. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  781. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  782. {$define FPC_HAS_UNICODESTR_COMPARE}
  783. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  784. {
  785. Compares 2 UnicodeStrings;
  786. The result is
  787. <0 if S1<S2
  788. 0 if S1=S2
  789. >0 if S1>S2
  790. }
  791. Var
  792. MaxI,Temp : SizeInt;
  793. begin
  794. if pointer(S1)=pointer(S2) then
  795. begin
  796. fpc_UnicodeStr_Compare:=0;
  797. exit;
  798. end;
  799. Maxi:=Length(S1);
  800. temp:=Length(S2);
  801. If MaxI>Temp then
  802. MaxI:=Temp;
  803. Temp:=CompareWord(S1[1],S2[1],MaxI);
  804. if temp=0 then
  805. temp:=Length(S1)-Length(S2);
  806. fpc_UnicodeStr_Compare:=Temp;
  807. end;
  808. {$endif FPC_HAS_UNICODESTR_COMPARE}
  809. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  810. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  811. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  812. {
  813. Compares 2 UnicodeStrings for equality only;
  814. The result is
  815. 0 if S1=S2
  816. <>0 if S1<>S2
  817. }
  818. Var
  819. MaxI : SizeInt;
  820. begin
  821. if pointer(S1)=pointer(S2) then
  822. exit(0);
  823. Maxi:=Length(S1);
  824. If MaxI<>Length(S2) then
  825. exit(-1)
  826. else
  827. exit(CompareWord(S1[1],S2[1],MaxI));
  828. end;
  829. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  830. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  831. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  832. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  833. begin
  834. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  835. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  836. end;
  837. Procedure fpc_UnicodeStr_ZeroBased_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_ZEROBASED_RANGECHECK']; compilerproc;
  838. begin
  839. if (p=nil) or (index>=PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<0) then
  840. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  841. end;
  842. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  843. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  844. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  845. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  846. {
  847. Sets The length of string S to L.
  848. Makes sure S is unique, and contains enough room.
  849. }
  850. Var
  851. Temp : Pointer;
  852. lens, lena : SizeUInt;
  853. begin
  854. if (l>0) then
  855. begin
  856. if Pointer(S)=nil then
  857. begin
  858. { Need a complete new string...}
  859. Pointer(s):=NewUnicodeString(l);
  860. end
  861. else
  862. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  863. begin
  864. Temp:=Pointer(s)-UnicodeFirstOff;
  865. lens:=MemSize(Temp);
  866. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  867. if (lena>lens) or ((lens>32) and (lena<=SizeInt(SizeUint(lens) div 2))) then
  868. Pointer(S):=reallocmem(Temp, lena)+UnicodeFirstOff;
  869. end
  870. else
  871. begin
  872. { Reallocation is needed... }
  873. Temp:=NewUnicodeString(l);
  874. { also move terminating null }
  875. lens:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len+1;
  876. if l<lens then
  877. lens:=l;
  878. Move(Pointer(S)^,Temp^,lens * Sizeof(UnicodeChar));
  879. fpc_unicodestr_decr_ref(Pointer(S));
  880. Pointer(S):=Temp;
  881. end;
  882. { Force nil termination in case it gets shorter }
  883. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  884. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  885. end
  886. else { length=0, deallocate the string }
  887. fpc_unicodestr_decr_ref (Pointer(S));
  888. end;
  889. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  890. {*****************************************************************************
  891. Public functions, In interface.
  892. *****************************************************************************}
  893. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  894. begin
  895. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  896. end;
  897. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  898. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  899. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  900. begin
  901. result:=StringToWideChar(Src,Dest,DestSize);
  902. end;
  903. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  904. function WideCharToString(S : PWideChar) : UnicodeString;
  905. begin
  906. result:=WideCharLenToString(s,Length(WideString(s)));
  907. end;
  908. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  909. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  910. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  911. var
  912. temp: widestring;
  913. Len: SizeInt;
  914. begin
  915. widestringmanager.Ansi2WideMoveProc(PAnsiChar(Src),StringCodePage(Src),temp,Length(Src));
  916. Len:=Length(temp);
  917. if DestSize<=Len then
  918. Len:=Destsize-1;
  919. move(temp[1],Dest^,Len*SizeOf(WideChar));
  920. Dest[Len]:=#0;
  921. result:=Dest;
  922. end;
  923. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  924. {$ifndef FPC_HAS_UNICODEFROMLOCALECHARS}
  925. {$define FPC_HAS_UNICODEFROMLOCALECHARS}
  926. function UnicodeFromLocaleChars(CodePage, Flags: Cardinal; LocaleStr: PAnsiChar;
  927. LocaleStrLen: SizeInt; UnicodeStr: PWideChar; UnicodeStrLen: SizeInt): SizeInt; overload;
  928. var
  929. temp: widestring;
  930. Len: SizeInt;
  931. begin
  932. widestringmanager.Ansi2WideMoveProc(LocaleStr,CodePage,temp,LocaleStrLen);
  933. Len:=Length(temp);
  934. // Only move when we have room.
  935. if (UnicodeStrLen>0) then
  936. begin
  937. if UnicodeStrLen<=Len then
  938. Len:=UnicodeStrLen-1;
  939. move(temp[1],UnicodeStr^,Len*SizeOf(WideChar));
  940. UnicodeStr[Len]:=#0;
  941. end;
  942. // Return length
  943. result:=len;
  944. end;
  945. {$endif ndef FPC_HAS_UNICODEFROMLOCALECHARS}
  946. function UnicodeFromLocaleChars(const LocaleName: AnsiString; Flags: Cardinal;
  947. LocaleStr: PAnsiChar; LocaleStrLen: SizeInt; UnicodeStr: PWideChar;
  948. UnicodeStrLen: SizeInt): SizeInt; overload;
  949. var
  950. CP : TSystemCodePage;
  951. begin
  952. if not LocaleNameToCodePage(LocaleName,CP) then
  953. Result:=0
  954. else
  955. Result:=UnicodeFromLocaleChars(CP,Flags,LocaleStr,LocaleStrLen,UnicodeStr,UnicodeStrLen);
  956. end;
  957. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  958. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  959. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  960. begin
  961. SetLength(result,Len);
  962. Move(S^,Pointer(Result)^,Len*2);
  963. end;
  964. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  965. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  966. begin
  967. Dest:=UnicodeCharLenToString(Src,Len);
  968. end;
  969. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  970. begin
  971. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  972. end;
  973. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  974. begin
  975. Dest:=AnsiString(UnicodeCharToString(S));
  976. end;
  977. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  978. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  979. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  980. begin
  981. SetLength(result,Len);
  982. Move(S^,Pointer(Result)^,Len*2);
  983. end;
  984. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  985. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  986. begin
  987. Dest:=WideCharLenToString(Src,Len);
  988. end;
  989. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  990. begin
  991. Dest:=AnsiString(WideCharLenToString(Src,Len));
  992. end;
  993. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  994. begin
  995. Dest:=WideCharToString(S);
  996. end;
  997. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  998. begin
  999. Dest:=AnsiString(WideCharToString(S));
  1000. end;
  1001. Function fpc_unicodestr_Unique_func(Var S : UnicodeString): Pointer; external name 'FPC_UNICODESTR_UNIQUE';
  1002. Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
  1003. begin
  1004. fpc_unicodestr_Unique_func(S);
  1005. end;
  1006. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  1007. {$define FPC_HAS_UNICODESTR_UNIQUE}
  1008. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1009. {
  1010. Make sure reference count of S is 1,
  1011. using copy-on-write semantics.
  1012. }
  1013. Var
  1014. SNew : Pointer;
  1015. L : SizeInt;
  1016. begin
  1017. pointer(result) := pointer(s);
  1018. If (result<>nil) and (PUnicodeRec(result-UnicodeFirstOff)^.Ref<>1) then
  1019. begin
  1020. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1021. SNew:=NewUnicodeString (L);
  1022. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1023. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1024. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1025. pointer(S):=SNew;
  1026. pointer(result):=SNew;
  1027. end;
  1028. end;
  1029. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1030. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1031. {$define FPC_HAS_UNICODESTR_COPY}
  1032. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1033. var
  1034. Lim : SizeInt;
  1035. ResultAddress : Pointer;
  1036. begin
  1037. ResultAddress:=Nil;
  1038. dec(index);
  1039. if Index < 0 then
  1040. Index := 0;
  1041. Lim:=Length(S)-Index; { Cannot overflow as both Length(S) and Index are non-negative. }
  1042. if Size>Lim then
  1043. Size:=Lim;
  1044. If Size>0 then
  1045. begin
  1046. ResultAddress:=NewUnicodeString(Size);
  1047. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1048. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1049. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1050. end;
  1051. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1052. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1053. end;
  1054. {$endif FPC_HAS_UNICODESTR_COPY}
  1055. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1056. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1057. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1058. var
  1059. i,MaxLen,nsource,nsub,d : SizeInt;
  1060. begin
  1061. Pos:=0;
  1062. nsource:=Length(Source);
  1063. nsub:=Length(Substr);
  1064. if (nsub>0) and (Offset>0) and (Offset<=nsource) then
  1065. begin
  1066. MaxLen:=nsource-nsub+1;
  1067. i:=Offset;
  1068. while (i<=MaxLen) do
  1069. begin
  1070. d:=IndexWord(Source[i],MaxLen-i+1,word(Substr[1]));
  1071. if d<0 then
  1072. exit;
  1073. if CompareWord(Substr[1],Source[i+d],nsub)=0 then
  1074. exit(i+d);
  1075. i:=i+d+1;
  1076. end;
  1077. end;
  1078. end;
  1079. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1080. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1081. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1082. { Faster version for a unicodechar alone }
  1083. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1084. var
  1085. ns,idx: SizeInt;
  1086. begin
  1087. pos:=0;
  1088. ns:=length(s);
  1089. if (Offset>0) and (Offset<=ns) then
  1090. begin
  1091. idx:=IndexWord(s[Offset],ns-Offset+1,word(c));
  1092. if idx>=0 then
  1093. pos:=Offset+idx;
  1094. end;
  1095. end;
  1096. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1097. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1098. block, which is significant bloat without any sensible speed improvement. }
  1099. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1100. begin
  1101. result:=Pos(UnicodeString(c),s,offset);
  1102. end;
  1103. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1104. begin
  1105. result:=Pos(UnicodeString(c),s,OffSet);
  1106. end;
  1107. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1108. begin
  1109. result:=Pos(c,UnicodeString(s),OffSet);
  1110. end;
  1111. {$ifndef FPC_HAS_UNICODESTR_OF_CHAR}
  1112. {$define FPC_HAS_UNICODESTR_OF_CHAR}
  1113. Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
  1114. begin
  1115. SetLength(StringOfChar,l);
  1116. FillWord(Pointer(StringOfChar)^,Length(StringOfChar),word(c));
  1117. end;
  1118. {$endif}
  1119. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1120. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1121. { Faster version for a AnsiChar alone. Must be implemented because }
  1122. { pos(c: AnsiChar; const s: shortstring) also exists, so otherwise }
  1123. { using pos(AnsiChar,pansichar) will always call the shortstring version }
  1124. { (exact match for first argument), also with $h+ (JM) }
  1125. Function Pos (c : AnsiChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1126. var
  1127. ns,idx: SizeInt;
  1128. begin
  1129. pos:=0;
  1130. ns:=length(s);
  1131. if (Offset>0) and (Offset<=ns) then
  1132. begin
  1133. idx:=IndexWord(s[Offset],ns-Offset+1,word(unicodechar(c)));
  1134. if idx>=0 then
  1135. pos:=Offset+idx;
  1136. end;
  1137. end;
  1138. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1139. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1140. {$define FPC_HAS_DELETE_UNICODESTR}
  1141. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1142. Var
  1143. LS : SizeInt;
  1144. begin
  1145. LS:=Length(S);
  1146. if (Index>LS) or (Index<=0) or (Size<=0) then
  1147. exit;
  1148. UniqueString (S);
  1149. { (Size+Index) will overflow if Size=MaxInt. }
  1150. if Size>LS-Index then
  1151. Size:=LS-Index+1;
  1152. if Size<=LS-Index then
  1153. begin
  1154. Dec(Index);
  1155. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1156. end;
  1157. Setlength(s,LS-Size);
  1158. end;
  1159. {$endif FPC_HAS_DELETE_UNICODESTR}
  1160. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1161. {$define FPC_HAS_INSERT_UNICODESTR}
  1162. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1163. var
  1164. LS,LSource : SizeInt;
  1165. selfinsert : boolean;
  1166. srcp : PUnicodeChar;
  1167. begin
  1168. If Source='' then
  1169. exit;
  1170. if S='' then
  1171. begin
  1172. S:=Source;
  1173. exit;
  1174. end;
  1175. LSource:=PUnicodeRec(Pointer(Source)-UnicodeFirstOff)^.Len;
  1176. LS:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len;
  1177. Dec(Index);
  1178. if index < 0 then
  1179. index := 0;
  1180. if index > LS then
  1181. index := LS;
  1182. selfinsert:=Pointer(Source)=Pointer(S);
  1183. SetLength(S,LSource+LS);
  1184. Move(PUnicodeChar(Pointer(S))[Index],PUnicodeChar(Pointer(S))[Index+LSource],(LS-Index)*sizeof(UnicodeChar));
  1185. srcp:=Pointer(Source);
  1186. if selfinsert then
  1187. srcp:=Pointer(S);
  1188. Move(srcp^,PUnicodeChar(Pointer(S))[Index],LSource*SizeOf(UnicodeChar));
  1189. end;
  1190. {$endif FPC_HAS_INSERT_UNICODESTR}
  1191. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1192. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1193. Function UpCase(c:UnicodeChar):UnicodeChar;
  1194. begin
  1195. Result:= widestringmanager.UpperUnicodeStringProc(UnicodeString(c))[1]
  1196. end;
  1197. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1198. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1199. {$define FPC_HAS_UPCASE_UNICODESTR}
  1200. function UpCase(const s : UnicodeString) : UnicodeString;
  1201. begin
  1202. result:=widestringmanager.UpperUnicodeStringProc(s);
  1203. end;
  1204. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1205. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1206. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1207. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1208. begin
  1209. Result:= widestringmanager.LowerUnicodeStringProc(UnicodeString(c))[1]
  1210. end;
  1211. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1212. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1213. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1214. function LowerCase(const s : UnicodeString) : UnicodeString;
  1215. begin
  1216. result:=widestringmanager.LowerUnicodeStringProc(s);
  1217. end;
  1218. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1219. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1220. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1221. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1222. begin
  1223. SetLength(S,Len);
  1224. If (Buf<>Nil) and (Len>0) then
  1225. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1226. end;
  1227. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1228. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1229. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1230. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PAnsiChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1231. begin
  1232. If (Buf<>Nil) and (Len>0) then
  1233. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1234. else
  1235. SetLength(S,Len);
  1236. end;
  1237. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1238. {$ifndef FPUNONE}
  1239. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1240. Var
  1241. SS: ShortString;
  1242. begin
  1243. fpc_Val_Real_UnicodeStr:=0;
  1244. if length(S)>255 then
  1245. code:=256
  1246. else
  1247. begin
  1248. SS:=ShortString(S);
  1249. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1250. end;
  1251. end;
  1252. {$endif}
  1253. {$ifndef FPC_STR_ENUM_INTERN}
  1254. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1255. var
  1256. ss: ShortString;
  1257. begin
  1258. if length(s)>255 then
  1259. code:=256
  1260. else
  1261. begin
  1262. ss:=ShortString(s);
  1263. val(ss,fpc_val_enum_unicodestr,code);
  1264. end;
  1265. end;
  1266. {$endif FPC_STR_ENUM_INTERN}
  1267. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1268. Var
  1269. SS: ShortString;
  1270. begin
  1271. if length(S)>255 then
  1272. begin
  1273. fpc_Val_Currency_UnicodeStr:=0;
  1274. code:=256;
  1275. end
  1276. else
  1277. begin
  1278. SS:=ShortString(S);
  1279. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1280. end;
  1281. end;
  1282. Function fpc_Val_UInt_UnicodeStr ({$ifndef VER3_2}DestSize: SizeInt;{$endif VER3_2} Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1283. Var
  1284. SS: ShortString;
  1285. begin
  1286. fpc_Val_UInt_UnicodeStr:=0;
  1287. if length(S)>255 then
  1288. code:=256
  1289. else
  1290. begin
  1291. SS:=ShortString(S);
  1292. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1293. end;
  1294. end;
  1295. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1296. Var
  1297. SS: ShortString;
  1298. begin
  1299. fpc_Val_SInt_UnicodeStr:=0;
  1300. if length(S)>255 then
  1301. code:=256
  1302. else
  1303. begin
  1304. SS:=ShortString(S);
  1305. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1306. end;
  1307. end;
  1308. {$ifndef CPU64}
  1309. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1310. Var
  1311. SS: ShortString;
  1312. begin
  1313. fpc_Val_qword_UnicodeStr:=0;
  1314. if length(S)>255 then
  1315. code:=256
  1316. else
  1317. begin
  1318. SS:=ShortString(S);
  1319. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1320. end;
  1321. end;
  1322. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1323. Var
  1324. SS: ShortString;
  1325. begin
  1326. fpc_Val_int64_UnicodeStr:=0;
  1327. if length(S)>255 then
  1328. code:=256
  1329. else
  1330. begin
  1331. SS:=ShortString(S);
  1332. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1333. end;
  1334. end;
  1335. {$endif CPU64}
  1336. {$if defined(CPU16) or defined(CPU8)}
  1337. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1338. Var
  1339. SS: ShortString;
  1340. begin
  1341. fpc_Val_longword_UnicodeStr:=0;
  1342. if length(S)>255 then
  1343. code:=256
  1344. else
  1345. begin
  1346. SS:=ShortString(S);
  1347. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1348. end;
  1349. end;
  1350. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1351. Var
  1352. SS: ShortString;
  1353. begin
  1354. fpc_Val_longint_UnicodeStr:=0;
  1355. if length(S)>255 then
  1356. code:=256
  1357. else
  1358. begin
  1359. SS:=ShortString(S);
  1360. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1361. end;
  1362. end;
  1363. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1364. Var
  1365. SS: ShortString;
  1366. begin
  1367. fpc_Val_word_UnicodeStr:=0;
  1368. if length(S)>255 then
  1369. code:=256
  1370. else
  1371. begin
  1372. SS:=ShortString(S);
  1373. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1374. end;
  1375. end;
  1376. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1377. Var
  1378. SS: ShortString;
  1379. begin
  1380. fpc_Val_smallint_UnicodeStr:=0;
  1381. if length(S)>255 then
  1382. code:=256
  1383. else
  1384. begin
  1385. SS:=ShortString(S);
  1386. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1387. end;
  1388. end;
  1389. {$endif CPU16 or CPU8}
  1390. {$ifndef FPUNONE}
  1391. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1392. var
  1393. ss: shortstring;
  1394. begin
  1395. str_real(len,fr,d,treal_type(rt),ss);
  1396. s:=UnicodeString(ss);
  1397. end;
  1398. {$endif}
  1399. {$ifndef FPC_STR_ENUM_INTERN}
  1400. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1401. var
  1402. ss: ShortString;
  1403. begin
  1404. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1405. s:=UnicodeString(ss);
  1406. end;
  1407. {$endif FPC_STR_ENUM_INTERN}
  1408. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1409. var
  1410. ss: ShortString;
  1411. begin
  1412. fpc_shortstr_bool(b,len,ss);
  1413. s:=UnicodeString(ss);
  1414. end;
  1415. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1416. var
  1417. ss: shortstring;
  1418. begin
  1419. str(c:len:fr,ss);
  1420. s:=UnicodeString(ss);
  1421. end;
  1422. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1423. Var
  1424. SS: ShortString;
  1425. begin
  1426. Str (v:Len,SS);
  1427. S:=UnicodeString(SS);
  1428. end;
  1429. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1430. Var
  1431. SS: ShortString;
  1432. begin
  1433. str(v:Len,SS);
  1434. S:=UnicodeString(SS);
  1435. end;
  1436. {$ifndef CPU64}
  1437. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1438. Var
  1439. SS: ShortString;
  1440. begin
  1441. Str (v:Len,SS);
  1442. S:=UnicodeString(SS);
  1443. end;
  1444. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1445. Var
  1446. SS: ShortString;
  1447. begin
  1448. str(v:Len,SS);
  1449. S:=UnicodeString(SS);
  1450. end;
  1451. {$endif CPU64}
  1452. {$if defined(CPU16) or defined(CPU8)}
  1453. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1454. Var
  1455. SS: ShortString;
  1456. begin
  1457. Str (v:Len,SS);
  1458. S:=UnicodeString(SS);
  1459. end;
  1460. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1461. Var
  1462. SS: ShortString;
  1463. begin
  1464. str(v:Len,SS);
  1465. S:=UnicodeString(SS);
  1466. end;
  1467. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1468. Var
  1469. SS: ShortString;
  1470. begin
  1471. Str (v:Len,SS);
  1472. S:=UnicodeString(SS);
  1473. end;
  1474. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1475. Var
  1476. SS: ShortString;
  1477. begin
  1478. str(v:Len,SS);
  1479. S:=UnicodeString(SS);
  1480. end;
  1481. {$endif CPU16 or CPU8}
  1482. function UnicodeToUtf8(Dest: PAnsiChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1483. begin
  1484. if assigned(Source) then
  1485. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1486. else
  1487. Result:=0;
  1488. end;
  1489. function UnicodeToUtf8(Dest: PAnsiChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1490. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1491. begin
  1492. runerror(217);
  1493. end;
  1494. {$else EXCLUDE_COMPLEX_PROCS}
  1495. var
  1496. i,j : SizeUInt;
  1497. lw : longword;
  1498. begin
  1499. result:=0;
  1500. if source=nil then
  1501. exit;
  1502. i:=0;
  1503. j:=0;
  1504. if assigned(Dest) then
  1505. begin
  1506. while (i<SourceChars) and (j<MaxDestBytes) do
  1507. begin
  1508. lw:=ord(Source[i]);
  1509. case lw of
  1510. 0..$7f:
  1511. begin
  1512. Dest[j]:=AnsiChar(lw);
  1513. inc(j);
  1514. end;
  1515. $80..$7ff:
  1516. begin
  1517. if j+1>=MaxDestBytes then
  1518. break;
  1519. Dest[j]:=AnsiChar($c0 or (lw shr 6));
  1520. Dest[j+1]:=AnsiChar($80 or (lw and $3f));
  1521. inc(j,2);
  1522. end;
  1523. $800..$d7ff,$e000..$ffff:
  1524. begin
  1525. if j+2>=MaxDestBytes then
  1526. break;
  1527. Dest[j]:=AnsiChar($e0 or (lw shr 12));
  1528. Dest[j+1]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1529. Dest[j+2]:=AnsiChar($80 or (lw and $3f));
  1530. inc(j,3);
  1531. end;
  1532. $d800..$dbff:
  1533. {High Surrogates}
  1534. begin
  1535. if j+3>=MaxDestBytes then
  1536. break;
  1537. if (i+1<sourcechars) and
  1538. (word(Source[i+1]) >= $dc00) and
  1539. (word(Source[i+1]) <= $dfff) then
  1540. begin
  1541. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1542. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1543. Dest[j]:=AnsiChar($f0 or (lw shr 18));
  1544. Dest[j+1]:=AnsiChar($80 or ((lw shr 12) and $3f));
  1545. Dest[j+2]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1546. Dest[j+3]:=AnsiChar($80 or (lw and $3f));
  1547. inc(j,4);
  1548. inc(i);
  1549. end;
  1550. end;
  1551. end;
  1552. inc(i);
  1553. end;
  1554. if j>SizeUInt(MaxDestBytes-1) then
  1555. j:=MaxDestBytes-1;
  1556. Dest[j]:=#0;
  1557. end
  1558. else
  1559. begin
  1560. while i<SourceChars do
  1561. begin
  1562. case word(Source[i]) of
  1563. $0..$7f:
  1564. inc(j);
  1565. $80..$7ff:
  1566. inc(j,2);
  1567. $800..$d7ff,$e000..$ffff:
  1568. inc(j,3);
  1569. $d800..$dbff:
  1570. begin
  1571. if (i+1<sourcechars) and
  1572. (word(Source[i+1]) >= $dc00) and
  1573. (word(Source[i+1]) <= $dfff) then
  1574. begin
  1575. inc(j,4);
  1576. inc(i);
  1577. end;
  1578. end;
  1579. end;
  1580. inc(i);
  1581. end;
  1582. end;
  1583. result:=j+1;
  1584. end;
  1585. {$endif EXCLUDE_COMPLEX_PROCS}
  1586. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PAnsiChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1587. begin
  1588. if assigned(Source) then
  1589. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
  1590. else
  1591. Result:=0;
  1592. end;
  1593. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1594. begin
  1595. Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
  1596. end;
  1597. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
  1598. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1599. begin
  1600. runerror(217);
  1601. end;
  1602. {$else EXCLUDE_COMPLEX_PROCS}
  1603. const
  1604. UNICODE_INVALID=63;
  1605. var
  1606. InputUTF8: SizeUInt;
  1607. IBYTE: BYTE;
  1608. OutputUnicode: SizeUInt;
  1609. PRECHAR: SizeUInt;
  1610. TempBYTE: BYTE;
  1611. CharLen: SizeUint;
  1612. LookAhead: SizeUInt;
  1613. UC: SizeUInt;
  1614. begin
  1615. if not assigned(Source) then
  1616. begin
  1617. result:=0;
  1618. exit;
  1619. end;
  1620. result:=SizeUInt(-1);
  1621. InputUTF8:=0;
  1622. OutputUnicode:=0;
  1623. PreChar:=0;
  1624. if Assigned(Dest) Then
  1625. begin
  1626. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1627. begin
  1628. IBYTE:=byte(Source[InputUTF8]);
  1629. if (IBYTE and $80) = 0 then
  1630. begin
  1631. // One character US-ASCII, convert it to unicode
  1632. // Commented code to convert LF to CRLF has been removed
  1633. Dest[OutputUnicode]:=WideChar(IBYTE);
  1634. inc(OutputUnicode);
  1635. PreChar:=IBYTE;
  1636. inc(InputUTF8);
  1637. end
  1638. else
  1639. begin
  1640. TempByte:=IBYTE;
  1641. CharLen:=0;
  1642. while (TempBYTE and $80)<>0 do
  1643. begin
  1644. TempBYTE:=(TempBYTE shl 1) and $FE;
  1645. inc(CharLen);
  1646. end;
  1647. //Test for the "CharLen" conforms UTF-8 string
  1648. //This means the 10xxxxxx pattern.
  1649. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1650. begin
  1651. //Insuficient chars in string to decode
  1652. //UTF-8 array. Fallback to single AnsiChar.
  1653. CharLen:= 1;
  1654. end;
  1655. for LookAhead := 1 to CharLen-1 do
  1656. begin
  1657. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1658. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1659. begin
  1660. //Invalid UTF-8 sequence, fallback.
  1661. CharLen:= LookAhead;
  1662. break;
  1663. end;
  1664. end;
  1665. UC:=$FFFF;
  1666. case CharLen of
  1667. 1: begin
  1668. //Not valid UTF-8 sequence
  1669. UC:=UNICODE_INVALID;
  1670. end;
  1671. 2: begin
  1672. //Two bytes UTF, convert it
  1673. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1674. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1675. if UC <= $7F then
  1676. begin
  1677. //Invalid UTF sequence.
  1678. UC:=UNICODE_INVALID;
  1679. end;
  1680. end;
  1681. 3: begin
  1682. //Three bytes, convert it to unicode
  1683. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1684. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1685. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1686. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1687. begin
  1688. //Invalid UTF-8 sequence
  1689. UC:= UNICODE_INVALID;
  1690. End;
  1691. end;
  1692. 4: begin
  1693. //Four bytes, convert it to two unicode characters
  1694. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1695. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1696. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1697. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1698. if (UC < $10000) or (UC > $10FFFF) then
  1699. begin
  1700. UC:= UNICODE_INVALID;
  1701. end
  1702. else
  1703. begin
  1704. { only store pair if room }
  1705. dec(UC,$10000);
  1706. if (OutputUnicode<MaxDestChars-1) then
  1707. begin
  1708. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1709. inc(OutputUnicode);
  1710. UC:=(UC and $3ff) + $DC00;
  1711. end
  1712. else
  1713. begin
  1714. InputUTF8:= InputUTF8 + CharLen;
  1715. { don't store anything }
  1716. CharLen:=0;
  1717. end;
  1718. end;
  1719. end;
  1720. 5,6,7: begin
  1721. //Invalid UTF8 to unicode conversion,
  1722. //mask it as invalid UNICODE too.
  1723. UC:=UNICODE_INVALID;
  1724. end;
  1725. end;
  1726. if CharLen > 0 then
  1727. begin
  1728. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1729. HandleError(231); // Will be converted to EConversionError in sysutils
  1730. PreChar:=UC;
  1731. Dest[OutputUnicode]:=WideChar(UC);
  1732. inc(OutputUnicode);
  1733. end;
  1734. InputUTF8:= InputUTF8 + CharLen;
  1735. end;
  1736. end;
  1737. Result:=OutputUnicode+1;
  1738. end
  1739. else
  1740. begin
  1741. while (InputUTF8<SourceBytes) do
  1742. begin
  1743. IBYTE:=byte(Source[InputUTF8]);
  1744. if (IBYTE and $80) = 0 then
  1745. begin
  1746. // One character US-ASCII, convert it to unicode
  1747. // Commented code to convert LF to CRLF has been removed
  1748. inc(OutputUnicode);
  1749. PreChar:=IBYTE;
  1750. inc(InputUTF8);
  1751. end
  1752. else
  1753. begin
  1754. TempByte:=IBYTE;
  1755. CharLen:=0;
  1756. while (TempBYTE and $80)<>0 do
  1757. begin
  1758. TempBYTE:=(TempBYTE shl 1) and $FE;
  1759. inc(CharLen);
  1760. end;
  1761. //Test for the "CharLen" conforms UTF-8 string
  1762. //This means the 10xxxxxx pattern.
  1763. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1764. begin
  1765. //Insuficient chars in string to decode
  1766. //UTF-8 array. Fallback to single AnsiChar.
  1767. CharLen:= 1;
  1768. end;
  1769. for LookAhead := 1 to CharLen-1 do
  1770. begin
  1771. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1772. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1773. begin
  1774. //Invalid UTF-8 sequence, fallback.
  1775. CharLen:= LookAhead;
  1776. break;
  1777. end;
  1778. end;
  1779. UC:=$FFFF;
  1780. case CharLen of
  1781. 1: begin
  1782. //Not valid UTF-8 sequence
  1783. UC:=UNICODE_INVALID;
  1784. end;
  1785. 2: begin
  1786. //Two bytes UTF, convert it
  1787. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1788. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1789. if UC <= $7F then
  1790. begin
  1791. //Invalid UTF sequence.
  1792. UC:=UNICODE_INVALID;
  1793. end;
  1794. end;
  1795. 3: begin
  1796. //Three bytes, convert it to unicode
  1797. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1798. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1799. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1800. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1801. begin
  1802. //Invalid UTF-8 sequence
  1803. UC:= UNICODE_INVALID;
  1804. end;
  1805. end;
  1806. 4: begin
  1807. //Four bytes, convert it to two unicode characters
  1808. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1809. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1810. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1811. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1812. if (UC < $10000) or (UC > $10FFFF) then
  1813. UC:= UNICODE_INVALID
  1814. else
  1815. { extra character character }
  1816. inc(OutputUnicode);
  1817. end;
  1818. 5,6,7: begin
  1819. //Invalid UTF8 to unicode conversion,
  1820. //mask it as invalid UNICODE too.
  1821. UC:=UNICODE_INVALID;
  1822. end;
  1823. end;
  1824. if CharLen > 0 then
  1825. begin
  1826. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1827. HandleError(231); // Will be converted to EConversionError in sysutils
  1828. PreChar:=UC;
  1829. inc(OutputUnicode);
  1830. end;
  1831. InputUTF8:= InputUTF8 + CharLen;
  1832. end;
  1833. end;
  1834. Result:=OutputUnicode+1;
  1835. end;
  1836. end;
  1837. {$endif EXCLUDE_COMPLEX_PROCS}
  1838. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1839. begin
  1840. Result:=UTF8Encode(UnicodeString(s));
  1841. end;
  1842. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1843. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1844. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1845. var
  1846. i : SizeInt;
  1847. hs : UTF8String;
  1848. begin
  1849. result:='';
  1850. if Length(s)=0 then
  1851. exit;
  1852. SetLength(hs,length(s)*3);
  1853. i:=UnicodeToUtf8(pansichar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1854. if i>0 then
  1855. begin
  1856. SetLength(hs,i-1);
  1857. result:=hs;
  1858. end;
  1859. end;
  1860. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1861. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1862. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1863. function UTF8Decode(const s : RawByteString): UnicodeString;
  1864. var
  1865. i : SizeInt;
  1866. hs : UnicodeString;
  1867. begin
  1868. result:='';
  1869. if Length(s)=0 then
  1870. exit;
  1871. SetLength(hs,length(s));
  1872. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pansichar(s),length(s));
  1873. if i>0 then
  1874. begin
  1875. SetLength(hs,i-1);
  1876. result:=hs;
  1877. end;
  1878. end;
  1879. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1880. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1881. begin
  1882. Result:=Utf8Encode(s);
  1883. end;
  1884. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1885. begin
  1886. Result:=RawByteString(Utf8Decode(s));
  1887. end;
  1888. {$ifdef FPC_HAS_FEATURE_DYNARRAYS}
  1889. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1890. var
  1891. i, reslen: sizeint;
  1892. w: longint;
  1893. begin
  1894. reslen:=0;
  1895. i:=0;
  1896. { calculate required length }
  1897. while (i<len) do
  1898. begin
  1899. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1900. inc(i)
  1901. else if (p[i]<=#$dbff) and
  1902. (i+1<len) and
  1903. (p[i+1]>=#$dc00) and
  1904. (p[i+1]<=#$dfff) then
  1905. inc(i,2)
  1906. else
  1907. inc(i);
  1908. inc(reslen);
  1909. end;
  1910. SetLength(res,reslen+1); { +1 for null termination }
  1911. reslen:=0;
  1912. i:=0;
  1913. { do conversion }
  1914. while (i<len) do
  1915. begin
  1916. w:=ord(p[i]);
  1917. if (w<=$d7ff) or (w>=$e000) then
  1918. res[reslen]:=w
  1919. else if (w<=$dbff) and
  1920. (i+1<len) and
  1921. (p[i+1]>=#$dc00) and
  1922. (p[i+1]<=#$dfff) then
  1923. begin
  1924. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1925. inc(i);
  1926. end
  1927. else { invalid surrogate pair }
  1928. res[reslen]:=w;
  1929. inc(i);
  1930. inc(reslen);
  1931. end;
  1932. res[reslen]:=0;
  1933. end;
  1934. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1935. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1936. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1937. begin
  1938. UCS4Encode(PWideChar(s),Length(s),result);
  1939. end;
  1940. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1941. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1942. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1943. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1944. begin
  1945. UCS4Encode(PWideChar(s),Length(s),result);
  1946. end;
  1947. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1948. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1949. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1950. { dest should point to previously allocated wide/unicodestring }
  1951. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1952. var
  1953. i: sizeint;
  1954. nc: UCS4Char;
  1955. begin
  1956. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1957. begin
  1958. nc:=s[i];
  1959. if (nc<=$ffff) then
  1960. dest^:=widechar(nc)
  1961. else if (dword(nc)<=$10ffff) then
  1962. begin
  1963. dest^:=widechar(nc shr 10 + $d7c0);
  1964. { subtracting $10000 doesn't change low 10 bits }
  1965. dest[1]:=widechar(nc and $3ff + $dc00);
  1966. inc(dest);
  1967. end
  1968. else { invalid code point }
  1969. dest^:='?';
  1970. inc(dest);
  1971. end;
  1972. end;
  1973. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1974. var
  1975. i : SizeInt;
  1976. reslen : SizeInt;
  1977. begin
  1978. reslen:=0;
  1979. for i:=0 to length(s)-2 do { skip terminating #0 }
  1980. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1981. SetLength(result,reslen);
  1982. UCS4Decode(s,pointer(result));
  1983. end;
  1984. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1985. var
  1986. i : SizeInt;
  1987. reslen : SizeInt;
  1988. begin
  1989. reslen:=0;
  1990. for i:=0 to length(s)-2 do { skip terminating #0 }
  1991. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1992. SetLength(result,reslen);
  1993. UCS4Decode(s,pointer(result));
  1994. end;
  1995. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1996. {$endif FPC_HAS_FEATURE_DYNARRAYS}
  1997. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1998. const
  1999. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  2000. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  2001. procedure unimplementedunicodestring;
  2002. begin
  2003. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2004. {$ifndef HAS_WIDESTRINGMANAGER}
  2005. {$ifndef FPC_SYSTEM_NO_VERBOSE_UNICODEERROR}
  2006. If IsConsole then
  2007. begin
  2008. Writeln(StdErr,SNoUnicodestrings);
  2009. Writeln(StdErr,SRecompileWithUnicodestrings);
  2010. end;
  2011. {$endif FPC_SYSTEM_NO_VERBOSE_UNICODEERROR}
  2012. {$endif HAS_WIDESTRINGMANAGER}
  2013. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2014. HandleErrorAddrFrameInd(234{RuntimeErrorExitCodes[reCodesetConversion]},get_pc_addr,get_frame);
  2015. end;
  2016. function StringElementSize(const S: UnicodeString): Word; overload;
  2017. begin
  2018. if assigned(Pointer(S)) then
  2019. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2020. else
  2021. Result:=SizeOf(UnicodeChar);
  2022. end;
  2023. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2024. begin
  2025. if assigned(Pointer(S)) then
  2026. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2027. else
  2028. Result:=0;
  2029. end;
  2030. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2031. begin
  2032. {$ifdef FPC_HAS_CPSTRING}
  2033. if assigned(Pointer(S)) then
  2034. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2035. else
  2036. {$endif FPC_HAS_CPSTRING}
  2037. Result:=DefaultUnicodeCodePage;
  2038. end;
  2039. {$push}
  2040. {$warnings off}
  2041. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2042. begin
  2043. unimplementedunicodestring;
  2044. end;
  2045. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2046. begin
  2047. unimplementedunicodestring;
  2048. end;
  2049. function StubWideCase(const s: WideString): WideString;
  2050. begin
  2051. unimplementedunicodestring;
  2052. end;
  2053. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2054. begin
  2055. unimplementedunicodestring;
  2056. end;
  2057. {$pop}
  2058. procedure initunicodestringmanager;
  2059. begin
  2060. {$ifndef HAS_WIDESTRINGMANAGER}
  2061. widestringmanager:=Default(TUnicodeStringManager);
  2062. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2063. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2064. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2065. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2066. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2067. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2068. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2069. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2070. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2071. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2072. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2073. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2074. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2075. {$endif HAS_WIDESTRINGMANAGER}
  2076. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2077. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2078. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2079. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2080. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2081. end;
  2082. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2083. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2084. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2085. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2086. Begin
  2087. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2088. DefaultFileSystemCodePage,Length(Str));
  2089. End;
  2090. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2091. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2092. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2093. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2094. Begin
  2095. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2096. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2097. End;
  2098. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2099. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2100. Begin
  2101. Result:=Str;
  2102. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2103. End;
  2104. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2105. ignore any codepage }
  2106. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2107. begin
  2108. Result := UTF8Decode(S);
  2109. end;
  2110. function UTF8ToUnicodeString(const s : RawByteString): UnicodeString;
  2111. begin
  2112. Result := UTF8Decode(S);
  2113. end;
  2114. function UTF8ToString(const S: ShortString): UnicodeString;
  2115. Var
  2116. rs: RawByteString;
  2117. begin
  2118. rs:=S;
  2119. Result := UTF8Decode(rs);
  2120. end;
  2121. function UTF8ToUnicodeString(const S: ShortString): unicodestring;
  2122. begin
  2123. Result:=UTF8ToString(S);
  2124. end;
  2125. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2126. var
  2127. rs: RawByteString;
  2128. Count: SizeInt;
  2129. begin
  2130. Count := length(S);
  2131. SetLength(rs, Count);
  2132. if Count > 0 then
  2133. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2134. Result := UTF8ToString(rs);
  2135. end;
  2136. function UTF8ToUnicodeString(const S: PAnsiChar): unicodestring;
  2137. begin
  2138. Result:=UTF8ToString(S);
  2139. end;
  2140. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2141. are as well }
  2142. {$ifndef CPUJVM}
  2143. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2144. var
  2145. rs: RawByteString;
  2146. Count: SizeInt;
  2147. begin
  2148. Count := Length(S);
  2149. SetLength(rs, Count);
  2150. if Count > 0 then
  2151. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2152. Result := UTF8ToString(rs);
  2153. end;
  2154. function UTF8ToString(const S: array of Byte): UnicodeString;
  2155. var
  2156. rs: RawByteString;
  2157. Count: SizeInt;
  2158. begin
  2159. Count := Length(S);
  2160. SetLength(rs, Count);
  2161. if Count > 0 then
  2162. fpc_pchar_ansistr_intern_charmove(pansichar(@S),Low(S),rs,0,Count);
  2163. Result := UTF8ToString(rs);
  2164. end;
  2165. {$endif not CPUJVM}
  2166. Function LocaleNameToCodePage(const localename : shortstring; out codepage : TSystemCodePage) : Boolean;
  2167. begin
  2168. Result:=(localename='UTF-8') or (localename='UTF8');
  2169. if Result then
  2170. CodePage:=CP_UTF8
  2171. else
  2172. begin
  2173. Result:=(localename='UTF-7') or (localename='UTF7');
  2174. if Result then
  2175. CodePage:=CP_UTF7
  2176. else
  2177. begin
  2178. Result:=Assigned(LocaleNameToCodePageCallBack);
  2179. If Result then
  2180. LocaleNameToCodePageCallBack(LocaleName,CodePage,Result);
  2181. end;
  2182. end;
  2183. end;