ustrings.inc 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  20. with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
  21. Windows COM BSTR.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Packed Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. First : UnicodeChar;
  40. end;
  41. Const
  42. UnicodeRecLen = SizeOf(TUnicodeRec);
  43. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  44. {
  45. Default UnicodeChar <-> Char conversion is to only convert the
  46. lower 127 chars, all others are translated to '?'.
  47. These routines can be overwritten for the Current Locale
  48. }
  49. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  50. var
  51. i : SizeInt;
  52. p : PAnsiChar;
  53. begin
  54. setlength(dest,len);
  55. p:=pointer(dest); {SetLength guarantees that dest is unique}
  56. for i:=1 to len do
  57. begin
  58. if word(source^)<256 then
  59. p^:=char(word(source^))
  60. else
  61. p^:='?';
  62. inc(source);
  63. inc(p);
  64. end;
  65. end;
  66. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  67. var
  68. i : SizeInt;
  69. p : PUnicodeChar;
  70. begin
  71. setlength(dest,len);
  72. p:=pointer(dest); {SetLength guarantees that dest is unique}
  73. for i:=1 to len do
  74. begin
  75. p^:=unicodechar(byte(source^));
  76. inc(source);
  77. inc(p);
  78. end;
  79. end;
  80. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  81. begin
  82. DefaultCharLengthPChar:=length(Str);
  83. end;
  84. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  85. begin
  86. if str[0]<>#0 then
  87. DefaultCodePointLength:=1
  88. else
  89. DefaultCodePointLength:=0;
  90. end;
  91. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  92. begin
  93. manager:=widestringmanager;
  94. end;
  95. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  96. begin
  97. Old:=widestringmanager;
  98. widestringmanager:=New;
  99. end;
  100. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  101. begin
  102. widestringmanager:=New;
  103. end;
  104. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  105. begin
  106. manager:=widestringmanager;
  107. end;
  108. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  109. begin
  110. Old:=widestringmanager;
  111. widestringmanager:=New;
  112. end;
  113. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  114. begin
  115. widestringmanager:=New;
  116. end;
  117. {****************************************************************************
  118. Internal functions, not in interface.
  119. ****************************************************************************}
  120. procedure UnicodeStringError;
  121. begin
  122. HandleErrorFrame(204,get_frame);
  123. end;
  124. {$ifdef UnicodeStrDebug}
  125. Procedure DumpUnicodeRec(S : Pointer);
  126. begin
  127. If S=Nil then
  128. Writeln ('String is nil')
  129. Else
  130. Begin
  131. With PUnicodeRec(S-UnicodeFirstOff)^ do
  132. begin
  133. Write ('(Len:',len);
  134. Writeln (' Ref: ',ref,')');
  135. end;
  136. end;
  137. end;
  138. {$endif}
  139. Function NewUnicodeString(Len : SizeInt) : Pointer;
  140. {
  141. Allocate a new UnicodeString on the heap.
  142. initialize it to zero length and reference count 1.
  143. }
  144. Var
  145. P : Pointer;
  146. begin
  147. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  148. If P<>Nil then
  149. begin
  150. PUnicodeRec(P)^.Len:=Len; { Initial length }
  151. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  152. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  153. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  154. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  155. inc(p,UnicodeFirstOff); { Points to string now }
  156. end
  157. else
  158. UnicodeStringError;
  159. NewUnicodeString:=P;
  160. end;
  161. Procedure DisposeUnicodeString(Var S : Pointer);
  162. {
  163. Deallocates a UnicodeString From the heap.
  164. }
  165. begin
  166. If S=Nil then
  167. exit;
  168. Dec (S,UnicodeFirstOff);
  169. Freemem(S);
  170. S:=Nil;
  171. end;
  172. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  173. {
  174. Decreases the ReferenceCount of a non constant unicodestring;
  175. If the reference count is zero, deallocate the string;
  176. }
  177. Type
  178. pSizeInt = ^SizeInt;
  179. Var
  180. l : pSizeInt;
  181. Begin
  182. { Zero string }
  183. if S=Nil then
  184. exit;
  185. { check for constant strings ...}
  186. l:=@PUnicodeRec(S-UnicodeFirstOff)^.Ref;
  187. if l^<0 then
  188. exit;
  189. { declocked does a MT safe dec and returns true, if the counter is 0 }
  190. if declocked(l^) then
  191. { Ref count dropped to zero remove }
  192. DisposeUnicodeString(S);
  193. end;
  194. { alias for internal use }
  195. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  196. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  197. Begin
  198. If S=Nil then
  199. exit;
  200. { constant string ? }
  201. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  202. exit;
  203. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  204. end;
  205. { alias for internal use }
  206. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  207. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  208. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  209. {
  210. Converts a UnicodeString to a ShortString;
  211. }
  212. Var
  213. Size : SizeInt;
  214. temp : ansistring;
  215. begin
  216. result:='';
  217. Size:=Length(S2);
  218. if Size>0 then
  219. begin
  220. If Size>high_of_res then
  221. Size:=high_of_res;
  222. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  223. result:=temp;
  224. end;
  225. end;
  226. {$else FPC_STRTOSHORTSTRINGPROC}
  227. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  228. {
  229. Converts a UnicodeString to a ShortString;
  230. }
  231. Var
  232. Size : SizeInt;
  233. temp : ansistring;
  234. begin
  235. res:='';
  236. Size:=Length(S2);
  237. if Size>0 then
  238. begin
  239. If Size>high(res) then
  240. Size:=high(res);
  241. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  242. res:=temp;
  243. end;
  244. end;
  245. {$endif FPC_STRTOSHORTSTRINGPROC}
  246. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  247. {
  248. Converts a ShortString to a UnicodeString;
  249. }
  250. Var
  251. Size : SizeInt;
  252. begin
  253. result:='';
  254. Size:=Length(S2);
  255. if Size>0 then
  256. begin
  257. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  258. { Terminating Zero }
  259. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  260. end;
  261. end;
  262. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  263. {
  264. Converts a UnicodeString to an AnsiString
  265. }
  266. Var
  267. Size : SizeInt;
  268. {$ifndef FPC_HAS_CPSTRING}
  269. cp : TSystemCodePage;
  270. {$endif FPC_HAS_CPSTRING}
  271. begin
  272. {$ifndef FPC_HAS_CPSTRING}
  273. cp:=$ffff;
  274. {$endif FPC_HAS_CPSTRING}
  275. result:='';
  276. Size:=Length(S2);
  277. if Size>0 then
  278. begin
  279. if cp=$ffff then
  280. cp:=DefaultSystemCodePage;
  281. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  282. end;
  283. end;
  284. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  285. {
  286. Converts an AnsiString to a UnicodeString;
  287. }
  288. Var
  289. Size : SizeInt;
  290. begin
  291. result:='';
  292. Size:=Length(S2);
  293. if Size>0 then
  294. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),StringCodePage(S2),result,Size);
  295. end;
  296. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  297. begin
  298. SetLength(Result,Length(S2));
  299. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  300. end;
  301. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  302. begin
  303. SetLength(Result,Length(S2));
  304. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  305. end;
  306. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  307. var
  308. Size : SizeInt;
  309. {$ifndef FPC_HAS_CPSTRING}
  310. cp : TSystemCodePage;
  311. {$endif FPC_HAS_CPSTRING}
  312. begin
  313. {$ifndef FPC_HAS_CPSTRING}
  314. cp:=$ffff;
  315. {$endif FPC_HAS_CPSTRING}
  316. result:='';
  317. if p=nil then
  318. exit;
  319. Size := IndexWord(p^, -1, 0);
  320. if Size>0 then
  321. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  322. end;
  323. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  324. var
  325. Size : SizeInt;
  326. begin
  327. result:='';
  328. if p=nil then
  329. exit;
  330. Size := IndexWord(p^, -1, 0);
  331. Setlength(result,Size);
  332. if Size>0 then
  333. begin
  334. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  335. { Terminating Zero }
  336. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  337. end;
  338. end;
  339. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  340. var
  341. Size : SizeInt;
  342. begin
  343. result:='';
  344. if p=nil then
  345. exit;
  346. Size := IndexWord(p^, -1, 0);
  347. Setlength(result,Size);
  348. if Size>0 then
  349. begin
  350. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  351. { Terminating Zero }
  352. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  353. end;
  354. end;
  355. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  356. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  357. var
  358. Size : SizeInt;
  359. temp: ansistring;
  360. begin
  361. result:='';
  362. if p=nil then
  363. exit;
  364. Size := IndexWord(p^, $7fffffff, 0);
  365. if Size>0 then
  366. begin
  367. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  368. result:=temp;
  369. end;
  370. end;
  371. {$else FPC_STRTOSHORTSTRINGPROC}
  372. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  373. var
  374. Size : SizeInt;
  375. temp: ansistring;
  376. begin
  377. res:='';
  378. if p=nil then
  379. exit;
  380. Size:=IndexWord(p^, high(PtrInt), 0);
  381. if Size>0 then
  382. begin
  383. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  384. res:=temp;
  385. end;
  386. end;
  387. {$endif FPC_STRTOSHORTSTRINGPROC}
  388. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  389. var
  390. Size : SizeInt;
  391. {$ifndef FPC_HAS_CPSTRING}
  392. cp : TSystemCodePage;
  393. {$endif FPC_HAS_CPSTRING}
  394. begin
  395. {$ifndef FPC_HAS_CPSTRING}
  396. cp:=$ffff;
  397. {$endif FPC_HAS_CPSTRING}
  398. result:='';
  399. if p=nil then
  400. exit;
  401. Size := IndexWord(p^, -1, 0);
  402. if Size>0 then
  403. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  404. end;
  405. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  406. Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
  407. var
  408. Size : SizeInt;
  409. temp: ansistring;
  410. begin
  411. result:='';
  412. if p=nil then
  413. exit;
  414. Size := IndexWord(p^, $7fffffff, 0);
  415. if Size>0 then
  416. begin
  417. widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
  418. result:=temp;
  419. end;
  420. end;
  421. {$else FPC_STRTOSHORTSTRINGPROC}
  422. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  423. var
  424. Size : SizeInt;
  425. temp: ansistring;
  426. begin
  427. res:='';
  428. if p=nil then
  429. exit;
  430. Size:=IndexWord(p^, high(PtrInt), 0);
  431. if Size>0 then
  432. begin
  433. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  434. res:=temp;
  435. end;
  436. end;
  437. {$endif FPC_STRTOSHORTSTRINGPROC}
  438. { checked against the ansistring routine, 2001-05-27 (FK) }
  439. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  440. {
  441. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  442. }
  443. begin
  444. If S2<>nil then
  445. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  446. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  447. { Decrease the reference count on the old S1 }
  448. fpc_unicodestr_decr_ref (S1);
  449. s1:=s2;
  450. end;
  451. { alias for internal use }
  452. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  453. {$ifndef STR_CONCAT_PROCS}
  454. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  455. Var
  456. Size,Location : SizeInt;
  457. pc : punicodechar;
  458. begin
  459. { only assign if s1 or s2 is empty }
  460. if (S1='') then
  461. begin
  462. result:=s2;
  463. exit;
  464. end;
  465. if (S2='') then
  466. begin
  467. result:=s1;
  468. exit;
  469. end;
  470. Location:=Length(S1);
  471. Size:=length(S2);
  472. SetLength(result,Size+Location);
  473. pc:=punicodechar(result);
  474. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  475. inc(pc,location);
  476. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  477. end;
  478. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  479. Var
  480. i : Longint;
  481. p : pointer;
  482. pc : punicodechar;
  483. Size,NewSize : SizeInt;
  484. begin
  485. { First calculate size of the result so we can do
  486. a single call to SetLength() }
  487. NewSize:=0;
  488. for i:=low(sarr) to high(sarr) do
  489. inc(Newsize,length(sarr[i]));
  490. SetLength(result,NewSize);
  491. pc:=punicodechar(result);
  492. for i:=low(sarr) to high(sarr) do
  493. begin
  494. p:=pointer(sarr[i]);
  495. if assigned(p) then
  496. begin
  497. Size:=length(unicodestring(p));
  498. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  499. inc(pc,size);
  500. end;
  501. end;
  502. end;
  503. {$else STR_CONCAT_PROCS}
  504. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  505. Var
  506. Size,Location : SizeInt;
  507. same : boolean;
  508. begin
  509. { only assign if s1 or s2 is empty }
  510. if (S1='') then
  511. begin
  512. DestS:=s2;
  513. exit;
  514. end;
  515. if (S2='') then
  516. begin
  517. DestS:=s1;
  518. exit;
  519. end;
  520. Location:=Length(S1);
  521. Size:=length(S2);
  522. { Use Pointer() typecasts to prevent extra conversion code }
  523. if Pointer(DestS)=Pointer(S1) then
  524. begin
  525. same:=Pointer(S1)=Pointer(S2);
  526. SetLength(DestS,Size+Location);
  527. if same then
  528. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  529. else
  530. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  531. end
  532. else if Pointer(DestS)=Pointer(S2) then
  533. begin
  534. SetLength(DestS,Size+Location);
  535. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  536. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  537. end
  538. else
  539. begin
  540. DestS:='';
  541. SetLength(DestS,Size+Location);
  542. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  543. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  544. end;
  545. end;
  546. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  547. Var
  548. i : Longint;
  549. p,pc : pointer;
  550. Size,NewLen : SizeInt;
  551. lowstart : longint;
  552. destcopy : pointer;
  553. OldDestLen : SizeInt;
  554. begin
  555. if high(sarr)=0 then
  556. begin
  557. DestS:='';
  558. exit;
  559. end;
  560. destcopy:=nil;
  561. lowstart:=low(sarr);
  562. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  563. inc(lowstart);
  564. { Check for another reuse, then we can't use
  565. the append optimization }
  566. for i:=lowstart to high(sarr) do
  567. begin
  568. if Pointer(DestS)=Pointer(sarr[i]) then
  569. begin
  570. { if DestS is used somewhere in the middle of the expression,
  571. we need to make sure the original string still exists after
  572. we empty/modify DestS.
  573. This trick only works with reference counted strings. Therefor
  574. this optimization is disabled for WINLIKEUNICODESTRING }
  575. destcopy:=pointer(dests);
  576. fpc_UnicodeStr_Incr_Ref(destcopy);
  577. lowstart:=low(sarr);
  578. break;
  579. end;
  580. end;
  581. { Start with empty DestS if we start with concatting
  582. the first array element }
  583. if lowstart=low(sarr) then
  584. DestS:='';
  585. OldDestLen:=length(DestS);
  586. { Calculate size of the result so we can do
  587. a single call to SetLength() }
  588. NewLen:=0;
  589. for i:=low(sarr) to high(sarr) do
  590. inc(NewLen,length(sarr[i]));
  591. SetLength(DestS,NewLen);
  592. { Concat all strings, except the string we already
  593. copied in DestS }
  594. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  595. for i:=lowstart to high(sarr) do
  596. begin
  597. p:=pointer(sarr[i]);
  598. if assigned(p) then
  599. begin
  600. Size:=length(unicodestring(p));
  601. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  602. inc(pc,size*sizeof(UnicodeChar));
  603. end;
  604. end;
  605. fpc_UnicodeStr_Decr_Ref(destcopy);
  606. end;
  607. {$endif STR_CONCAT_PROCS}
  608. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  609. var
  610. w: unicodestring;
  611. begin
  612. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  613. fpc_Char_To_UChar:=w[1];
  614. end;
  615. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  616. {
  617. Converts a Char to a UnicodeString;
  618. }
  619. begin
  620. Setlength(fpc_Char_To_UnicodeStr,1);
  621. fpc_Char_To_UnicodeStr[1]:=c;
  622. { Terminating Zero }
  623. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  624. end;
  625. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  626. {
  627. Converts a UnicodeChar to a Char;
  628. }
  629. var
  630. s: ansistring;
  631. begin
  632. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  633. if length(s)=1 then
  634. fpc_UChar_To_Char:= s[1]
  635. else
  636. fpc_UChar_To_Char:='?';
  637. end;
  638. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  639. {
  640. Converts a WideChar to a UnicodeString;
  641. }
  642. begin
  643. Setlength (Result,1);
  644. Result[1]:= c;
  645. end;
  646. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  647. var
  648. w: widestring;
  649. begin
  650. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  651. fpc_Char_To_WChar:=w[1];
  652. end;
  653. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  654. {
  655. Converts a WideChar to a Char;
  656. }
  657. var
  658. s: ansistring;
  659. begin
  660. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  661. if length(s)=1 then
  662. fpc_WChar_To_Char:= s[1]
  663. else
  664. fpc_WChar_To_Char:='?';
  665. end;
  666. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  667. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  668. {
  669. Converts a WideChar to a ShortString;
  670. }
  671. var
  672. s: ansistring;
  673. begin
  674. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  675. fpc_WChar_To_ShortStr:= s;
  676. end;
  677. {$else FPC_STRTOSHORTSTRINGPROC}
  678. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  679. {
  680. Converts a WideChar to a ShortString;
  681. }
  682. var
  683. s: ansistring;
  684. begin
  685. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  686. res:=s;
  687. end;
  688. {$endif FPC_STRTOSHORTSTRINGPROC}
  689. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  690. {
  691. Converts a UnicodeChar to a UnicodeString;
  692. }
  693. begin
  694. Setlength (fpc_UChar_To_UnicodeStr,1);
  695. fpc_UChar_To_UnicodeStr[1]:= c;
  696. end;
  697. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  698. {
  699. Converts a UnicodeChar to a AnsiString;
  700. }
  701. {$ifndef FPC_HAS_CPSTRING}
  702. var
  703. cp : TSystemCodePage;
  704. {$endif FPC_HAS_CPSTRING}
  705. begin
  706. {$ifndef FPC_HAS_CPSTRING}
  707. cp:=$ffff;
  708. {$endif FPC_HAS_CPSTRING}
  709. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  710. end;
  711. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  712. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  713. {
  714. Converts a UnicodeChar to a ShortString;
  715. }
  716. var
  717. s: ansistring;
  718. begin
  719. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  720. fpc_UChar_To_ShortStr:= s;
  721. end;
  722. {$else FPC_STRTOSHORTSTRINGPROC}
  723. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  724. {
  725. Converts a UnicodeChar to a ShortString;
  726. }
  727. var
  728. s: ansistring;
  729. begin
  730. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  731. res:=s;
  732. end;
  733. {$endif FPC_STRTOSHORTSTRINGPROC}
  734. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  735. Var
  736. L : SizeInt;
  737. begin
  738. if (not assigned(p)) or (p[0]=#0) Then
  739. begin
  740. fpc_pchar_to_unicodestr := '';
  741. exit;
  742. end;
  743. l:=IndexChar(p^,-1,#0);
  744. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  745. end;
  746. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  747. var
  748. i : SizeInt;
  749. begin
  750. if zerobased then
  751. begin
  752. if arr[0]=#0 Then
  753. begin
  754. fpc_chararray_to_unicodestr:='';
  755. exit;
  756. end;
  757. i:=IndexChar(arr,high(arr)+1,#0);
  758. if i=-1 then
  759. i:=high(arr)+1;
  760. end
  761. else
  762. i:=high(arr)+1;
  763. SetLength(fpc_CharArray_To_UnicodeStr,i);
  764. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  765. end;
  766. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  767. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  768. var
  769. l: longint;
  770. index: longint;
  771. len: byte;
  772. temp: ansistring;
  773. begin
  774. l := high(arr)+1;
  775. if l>=256 then
  776. l:=255
  777. else if l<0 then
  778. l:=0;
  779. if zerobased then
  780. begin
  781. index:=IndexWord(arr[0],l,0);
  782. if (index < 0) then
  783. len := l
  784. else
  785. len := index;
  786. end
  787. else
  788. len := l;
  789. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  790. fpc_UnicodeCharArray_To_ShortStr := temp;
  791. end;
  792. {$else FPC_STRTOSHORTSTRINGPROC}
  793. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  794. var
  795. l: longint;
  796. index: ptrint;
  797. len: byte;
  798. temp: ansistring;
  799. begin
  800. l := high(arr)+1;
  801. if l>=high(res)+1 then
  802. l:=high(res)
  803. else if l<0 then
  804. l:=0;
  805. if zerobased then
  806. begin
  807. index:=IndexWord(arr[0],l,0);
  808. if index<0 then
  809. len:=l
  810. else
  811. len:=index;
  812. end
  813. else
  814. len:=l;
  815. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  816. res:=temp;
  817. end;
  818. {$endif FPC_STRTOSHORTSTRINGPROC}
  819. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING}zerobased: boolean = true): AnsiString; compilerproc;
  820. var
  821. i : SizeInt;
  822. {$ifndef FPC_HAS_CPSTRING}
  823. cp : TSystemCodePage;
  824. {$endif FPC_HAS_CPSTRING}
  825. begin
  826. {$ifndef FPC_HAS_CPSTRING}
  827. cp:=$ffff;
  828. {$endif FPC_HAS_CPSTRING}
  829. if (zerobased) then
  830. begin
  831. i:=IndexWord(arr,high(arr)+1,0);
  832. if i = -1 then
  833. i := high(arr)+1;
  834. end
  835. else
  836. i := high(arr)+1;
  837. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  838. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  839. end;
  840. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  841. var
  842. i : SizeInt;
  843. begin
  844. if (zerobased) then
  845. begin
  846. i:=IndexWord(arr,high(arr)+1,0);
  847. if i = -1 then
  848. i := high(arr)+1;
  849. end
  850. else
  851. i := high(arr)+1;
  852. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  853. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  854. end;
  855. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  856. var
  857. i : SizeInt;
  858. begin
  859. if (zerobased) then
  860. begin
  861. i:=IndexWord(arr,high(arr)+1,0);
  862. if i = -1 then
  863. i := high(arr)+1;
  864. end
  865. else
  866. i := high(arr)+1;
  867. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  868. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  869. end;
  870. { due to their names, the following procedures should be in wstrings.inc,
  871. however, the compiler generates code using this functions on all platforms }
  872. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  873. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  874. var
  875. l: longint;
  876. index: longint;
  877. len: byte;
  878. temp: ansistring;
  879. begin
  880. l := high(arr)+1;
  881. if l>=256 then
  882. l:=255
  883. else if l<0 then
  884. l:=0;
  885. if zerobased then
  886. begin
  887. index:=IndexWord(arr[0],l,0);
  888. if (index < 0) then
  889. len := l
  890. else
  891. len := index;
  892. end
  893. else
  894. len := l;
  895. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  896. fpc_WideCharArray_To_ShortStr := temp;
  897. end;
  898. {$else FPC_STRTOSHORTSTRINGPROC}
  899. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  900. var
  901. l: longint;
  902. index: ptrint;
  903. len: byte;
  904. temp: ansistring;
  905. begin
  906. l := high(arr)+1;
  907. if l>=high(res)+1 then
  908. l:=high(res)
  909. else if l<0 then
  910. l:=0;
  911. if zerobased then
  912. begin
  913. index:=IndexWord(arr[0],l,0);
  914. if index<0 then
  915. len:=l
  916. else
  917. len:=index;
  918. end
  919. else
  920. len:=l;
  921. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  922. res:=temp;
  923. end;
  924. {$endif FPC_STRTOSHORTSTRINGPROC}
  925. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  926. var
  927. i : SizeInt;
  928. {$ifndef FPC_HAS_CPSTRING}
  929. cp : TSystemCodePage;
  930. {$endif FPC_HAS_CPSTRING}
  931. begin
  932. {$ifndef FPC_HAS_CPSTRING}
  933. cp:=$ffff;
  934. {$endif FPC_HAS_CPSTRING}
  935. if (zerobased) then
  936. begin
  937. i:=IndexWord(arr,high(arr)+1,0);
  938. if i = -1 then
  939. i := high(arr)+1;
  940. end
  941. else
  942. i := high(arr)+1;
  943. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  944. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  945. end;
  946. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  947. var
  948. i : SizeInt;
  949. begin
  950. if (zerobased) then
  951. begin
  952. i:=IndexWord(arr,high(arr)+1,0);
  953. if i = -1 then
  954. i := high(arr)+1;
  955. end
  956. else
  957. i := high(arr)+1;
  958. SetLength(fpc_WideCharArray_To_WideStr,i);
  959. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  960. end;
  961. {$ifndef FPC_STRTOCHARARRAYPROC}
  962. { inside the compiler, the resulttype is modified to that of the actual }
  963. { chararray we're converting to (JM) }
  964. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  965. var
  966. len: SizeInt;
  967. temp: ansistring;
  968. begin
  969. len := length(src);
  970. { make sure we don't dereference src if it can be nil (JM) }
  971. if len > 0 then
  972. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  973. len := length(temp);
  974. if len > arraysize then
  975. len := arraysize;
  976. {$r-}
  977. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  978. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  979. {$ifdef RangeCheckWasOn}
  980. {$r+}
  981. {$endif}
  982. end;
  983. { inside the compiler, the resulttype is modified to that of the actual }
  984. { unicodechararray we're converting to (JM) }
  985. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  986. var
  987. len: SizeInt;
  988. begin
  989. len := length(src);
  990. if len > arraysize then
  991. len := arraysize;
  992. {$r-}
  993. { make sure we don't try to access element 1 of the ansistring if it's nil }
  994. if len > 0 then
  995. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  996. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  997. {$ifdef RangeCheckWasOn}
  998. {$r+}
  999. {$endif}
  1000. end;
  1001. { inside the compiler, the resulttype is modified to that of the actual }
  1002. { chararray we're converting to (JM) }
  1003. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  1004. var
  1005. len: SizeInt;
  1006. temp: unicodestring;
  1007. begin
  1008. len := length(src);
  1009. { make sure we don't dereference src if it can be nil (JM) }
  1010. if len > 0 then
  1011. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1012. len := length(temp);
  1013. if len > arraysize then
  1014. len := arraysize;
  1015. {$r-}
  1016. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  1017. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1018. {$ifdef RangeCheckWasOn}
  1019. {$r+}
  1020. {$endif}
  1021. end;
  1022. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  1023. var
  1024. len: longint;
  1025. temp : unicodestring;
  1026. begin
  1027. len := length(src);
  1028. { make sure we don't access char 1 if length is 0 (JM) }
  1029. if len > 0 then
  1030. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1031. len := length(temp);
  1032. if len > arraysize then
  1033. len := arraysize;
  1034. {$r-}
  1035. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  1036. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1037. {$ifdef RangeCheckWasOn}
  1038. {$r+}
  1039. {$endif}
  1040. end;
  1041. {$else ndef FPC_STRTOCHARARRAYPROC}
  1042. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  1043. var
  1044. len: SizeInt;
  1045. temp: ansistring;
  1046. begin
  1047. len := length(src);
  1048. { make sure we don't dereference src if it can be nil (JM) }
  1049. if len > 0 then
  1050. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  1051. len := length(temp);
  1052. if len > length(res) then
  1053. len := length(res);
  1054. {$r-}
  1055. move(temp[1],res[0],len);
  1056. fillchar(res[len],length(res)-len,0);
  1057. {$ifdef RangeCheckWasOn}
  1058. {$r+}
  1059. {$endif}
  1060. end;
  1061. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  1062. var
  1063. len: SizeInt;
  1064. begin
  1065. len := length(src);
  1066. if len > length(res) then
  1067. len := length(res);
  1068. {$r-}
  1069. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1070. if len > 0 then
  1071. move(src[1],res[0],len*SizeOf(UnicodeChar));
  1072. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1073. {$ifdef RangeCheckWasOn}
  1074. {$r+}
  1075. {$endif}
  1076. end;
  1077. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  1078. var
  1079. len: SizeInt;
  1080. temp: unicodestring;
  1081. begin
  1082. len := length(src);
  1083. { make sure we don't dereference src if it can be nil (JM) }
  1084. if len > 0 then
  1085. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1086. len := length(temp);
  1087. if len > length(res) then
  1088. len := length(res);
  1089. {$r-}
  1090. move(temp[1],res[0],len*sizeof(unicodechar));
  1091. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1092. {$ifdef RangeCheckWasOn}
  1093. {$r+}
  1094. {$endif}
  1095. end;
  1096. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  1097. var
  1098. len: longint;
  1099. temp : unicodestring;
  1100. begin
  1101. len := length(src);
  1102. { make sure we don't access char 1 if length is 0 (JM) }
  1103. if len > 0 then
  1104. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1105. len := length(temp);
  1106. if len > length(res) then
  1107. len := length(res);
  1108. {$r-}
  1109. move(temp[1],res[0],len*sizeof(unicodechar));
  1110. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1111. {$ifdef RangeCheckWasOn}
  1112. {$r+}
  1113. {$endif}
  1114. end;
  1115. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
  1116. var
  1117. len: SizeInt;
  1118. temp: widestring;
  1119. begin
  1120. len := length(src);
  1121. { make sure we don't dereference src if it can be nil (JM) }
  1122. if len > 0 then
  1123. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  1124. len := length(temp);
  1125. if len > length(res) then
  1126. len := length(res);
  1127. {$r-}
  1128. move(temp[1],res[0],len*sizeof(widechar));
  1129. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1130. {$ifdef RangeCheckWasOn}
  1131. {$r+}
  1132. {$endif}
  1133. end;
  1134. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  1135. var
  1136. len: longint;
  1137. temp : widestring;
  1138. begin
  1139. len := length(src);
  1140. { make sure we don't access char 1 if length is 0 (JM) }
  1141. if len > 0 then
  1142. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1143. len := length(temp);
  1144. if len > length(res) then
  1145. len := length(res);
  1146. {$r-}
  1147. move(temp[1],res[0],len*sizeof(widechar));
  1148. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1149. {$ifdef RangeCheckWasOn}
  1150. {$r+}
  1151. {$endif}
  1152. end;
  1153. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  1154. var
  1155. len: SizeInt;
  1156. begin
  1157. len := length(src);
  1158. if len > length(res) then
  1159. len := length(res);
  1160. {$r-}
  1161. { make sure we don't try to access element 1 of the widestring if it's nil }
  1162. if len > 0 then
  1163. move(src[1],res[0],len*SizeOf(WideChar));
  1164. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1165. {$ifdef RangeCheckWasOn}
  1166. {$r+}
  1167. {$endif}
  1168. end;
  1169. {$endif ndef FPC_STRTOCHARARRAYPROC}
  1170. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  1171. {
  1172. Compares 2 UnicodeStrings;
  1173. The result is
  1174. <0 if S1<S2
  1175. 0 if S1=S2
  1176. >0 if S1>S2
  1177. }
  1178. Var
  1179. MaxI,Temp : SizeInt;
  1180. begin
  1181. if pointer(S1)=pointer(S2) then
  1182. begin
  1183. fpc_UnicodeStr_Compare:=0;
  1184. exit;
  1185. end;
  1186. Maxi:=Length(S1);
  1187. temp:=Length(S2);
  1188. If MaxI>Temp then
  1189. MaxI:=Temp;
  1190. Temp:=CompareWord(S1[1],S2[1],MaxI);
  1191. if temp=0 then
  1192. temp:=Length(S1)-Length(S2);
  1193. fpc_UnicodeStr_Compare:=Temp;
  1194. end;
  1195. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  1196. {
  1197. Compares 2 UnicodeStrings for equality only;
  1198. The result is
  1199. 0 if S1=S2
  1200. <>0 if S1<>S2
  1201. }
  1202. Var
  1203. MaxI : SizeInt;
  1204. begin
  1205. if pointer(S1)=pointer(S2) then
  1206. exit(0);
  1207. Maxi:=Length(S1);
  1208. If MaxI<>Length(S2) then
  1209. exit(-1)
  1210. else
  1211. exit(CompareWord(S1[1],S2[1],MaxI));
  1212. end;
  1213. {$ifdef VER2_4}
  1214. // obsolete but needed for bootstrapping with 2.4
  1215. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1216. begin
  1217. if p=nil then
  1218. HandleErrorFrame(201,get_frame);
  1219. end;
  1220. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1221. begin
  1222. if (index>len div 2) or (Index<1) then
  1223. HandleErrorFrame(201,get_frame);
  1224. end;
  1225. {$else VER2_4}
  1226. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1227. begin
  1228. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len div 2) or (Index<1) then
  1229. HandleErrorFrame(201,get_frame);
  1230. end;
  1231. {$endif VER2_4}
  1232. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1233. {
  1234. Sets The length of string S to L.
  1235. Makes sure S is unique, and contains enough room.
  1236. }
  1237. Var
  1238. Temp : Pointer;
  1239. movelen: SizeInt;
  1240. begin
  1241. if (l>0) then
  1242. begin
  1243. if Pointer(S)=nil then
  1244. begin
  1245. { Need a complete new string...}
  1246. Pointer(s):=NewUnicodeString(l);
  1247. end
  1248. else
  1249. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1250. begin
  1251. Dec(Pointer(S),UnicodeFirstOff);
  1252. if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
  1253. reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
  1254. Inc(Pointer(S), UnicodeFirstOff);
  1255. end
  1256. else
  1257. begin
  1258. { Reallocation is needed... }
  1259. Temp:=Pointer(NewUnicodeString(L));
  1260. if Length(S)>0 then
  1261. begin
  1262. if l < succ(length(s)) then
  1263. movelen := l
  1264. { also move terminating null }
  1265. else
  1266. movelen := succ(length(s));
  1267. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1268. end;
  1269. fpc_unicodestr_decr_ref(Pointer(S));
  1270. Pointer(S):=Temp;
  1271. end;
  1272. { Force nil termination in case it gets shorter }
  1273. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1274. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  1275. end
  1276. else
  1277. begin
  1278. { Length=0 }
  1279. if Pointer(S)<>nil then
  1280. fpc_unicodestr_decr_ref (Pointer(S));
  1281. Pointer(S):=Nil;
  1282. end;
  1283. end;
  1284. {*****************************************************************************
  1285. Public functions, In interface.
  1286. *****************************************************************************}
  1287. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1288. begin
  1289. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1290. end;
  1291. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1292. var
  1293. temp:unicodestring;
  1294. begin
  1295. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1296. if Length(temp)<DestSize then
  1297. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1298. else
  1299. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1300. Dest[DestSize-1]:=#0;
  1301. result:=Dest;
  1302. end;
  1303. function WideCharToString(S : PWideChar) : UnicodeString;
  1304. begin
  1305. result:=WideCharLenToString(s,Length(WideString(s)));
  1306. end;
  1307. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1308. var
  1309. temp:widestring;
  1310. begin
  1311. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1312. if Length(temp)<DestSize then
  1313. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1314. else
  1315. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1316. Dest[DestSize-1]:=#0;
  1317. result:=Dest;
  1318. end;
  1319. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1320. begin
  1321. SetLength(result,Len);
  1322. Move(S^,Pointer(Result)^,Len*2);
  1323. end;
  1324. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1325. begin
  1326. Dest:=UnicodeCharLenToString(Src,Len);
  1327. end;
  1328. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1329. begin
  1330. Dest:=UnicodeCharLenToString(Src,Len);
  1331. end;
  1332. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1333. begin
  1334. Dest:=UnicodeCharToString(S);
  1335. end;
  1336. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1337. begin
  1338. SetLength(result,Len);
  1339. Move(S^,Pointer(Result)^,Len*2);
  1340. end;
  1341. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1342. begin
  1343. Dest:=WideCharLenToString(Src,Len);
  1344. end;
  1345. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1346. begin
  1347. Dest:=WideCharLenToString(Src,Len);
  1348. end;
  1349. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1350. begin
  1351. Dest:=WideCharToString(S);
  1352. end;
  1353. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1354. begin
  1355. Dest:=WideCharToString(S);
  1356. end;
  1357. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1358. {
  1359. Make sure reference count of S is 1,
  1360. using copy-on-write semantics.
  1361. }
  1362. Var
  1363. SNew : Pointer;
  1364. L : SizeInt;
  1365. begin
  1366. pointer(result) := pointer(s);
  1367. If Pointer(S)=Nil then
  1368. exit;
  1369. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1370. begin
  1371. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
  1372. SNew:=NewUnicodeString (L);
  1373. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1374. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
  1375. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1376. pointer(S):=SNew;
  1377. pointer(result):=SNew;
  1378. end;
  1379. end;
  1380. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1381. var
  1382. ResultAddress : Pointer;
  1383. begin
  1384. ResultAddress:=Nil;
  1385. dec(index);
  1386. if Index < 0 then
  1387. Index := 0;
  1388. { Check Size. Accounts for Zero-length S, the double check is needed because
  1389. Size can be maxint and will get <0 when adding index }
  1390. if (Size>Length(S)) or
  1391. (Index+Size>Length(S)) then
  1392. Size:=Length(S)-Index;
  1393. If Size>0 then
  1394. begin
  1395. If Index<0 Then
  1396. Index:=0;
  1397. ResultAddress:=Pointer(NewUnicodeString (Size));
  1398. if ResultAddress<>Nil then
  1399. begin
  1400. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1401. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size*sizeof(UnicodeChar);
  1402. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1403. end;
  1404. end;
  1405. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1406. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1407. end;
  1408. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1409. var
  1410. i,MaxLen : SizeInt;
  1411. pc : punicodechar;
  1412. begin
  1413. Pos:=0;
  1414. if Length(SubStr)>0 then
  1415. begin
  1416. MaxLen:=Length(source)-Length(SubStr);
  1417. i:=0;
  1418. pc:=@source[1];
  1419. while (i<=MaxLen) do
  1420. begin
  1421. inc(i);
  1422. if (SubStr[1]=pc^) and
  1423. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1424. begin
  1425. Pos:=i;
  1426. exit;
  1427. end;
  1428. inc(pc);
  1429. end;
  1430. end;
  1431. end;
  1432. { Faster version for a unicodechar alone }
  1433. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1434. var
  1435. i: SizeInt;
  1436. pc : punicodechar;
  1437. begin
  1438. pc:=@s[1];
  1439. for i:=1 to length(s) do
  1440. begin
  1441. if pc^=c then
  1442. begin
  1443. pos:=i;
  1444. exit;
  1445. end;
  1446. inc(pc);
  1447. end;
  1448. pos:=0;
  1449. end;
  1450. Function Pos (c : RawByteString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1451. begin
  1452. result:=Pos(UnicodeString(c),s);
  1453. end;
  1454. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1455. begin
  1456. result:=Pos(UnicodeString(c),s);
  1457. end;
  1458. Function Pos (c : UnicodeString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1459. begin
  1460. result:=Pos(c,UnicodeString(s));
  1461. end;
  1462. { Faster version for a char alone. Must be implemented because }
  1463. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1464. { using pos(char,pchar) will always call the shortstring version }
  1465. { (exact match for first argument), also with $h+ (JM) }
  1466. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1467. var
  1468. i: SizeInt;
  1469. wc : unicodechar;
  1470. pc : punicodechar;
  1471. begin
  1472. wc:=c;
  1473. pc:=@s[1];
  1474. for i:=1 to length(s) do
  1475. begin
  1476. if pc^=wc then
  1477. begin
  1478. pos:=i;
  1479. exit;
  1480. end;
  1481. inc(pc);
  1482. end;
  1483. pos:=0;
  1484. end;
  1485. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1486. Var
  1487. LS : SizeInt;
  1488. begin
  1489. LS:=Length(S);
  1490. if (Index>LS) or (Index<=0) or (Size<=0) then
  1491. exit;
  1492. UniqueString (S);
  1493. { (Size+Index) will overflow if Size=MaxInt. }
  1494. if Size>LS-Index then
  1495. Size:=LS-Index+1;
  1496. if Size<=LS-Index then
  1497. begin
  1498. Dec(Index);
  1499. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1500. end;
  1501. Setlength(s,LS-Size);
  1502. end;
  1503. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1504. var
  1505. Temp : UnicodeString;
  1506. LS : SizeInt;
  1507. begin
  1508. If Length(Source)=0 then
  1509. exit;
  1510. if index <= 0 then
  1511. index := 1;
  1512. Ls:=Length(S);
  1513. if index > LS then
  1514. index := LS+1;
  1515. Dec(Index);
  1516. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1517. SetLength(Temp,Length(Source)+LS);
  1518. If Index>0 then
  1519. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1520. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1521. If (LS-Index)>0 then
  1522. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1523. S:=Temp;
  1524. end;
  1525. Function UpCase(c:UnicodeChar):UnicodeChar;
  1526. var
  1527. s : UnicodeString;
  1528. begin
  1529. s:=c;
  1530. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1531. end;
  1532. function UpCase(const s : UnicodeString) : UnicodeString;
  1533. begin
  1534. result:=widestringmanager.UpperUnicodeStringProc(s);
  1535. end;
  1536. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1537. begin
  1538. SetLength(S,Len);
  1539. If (Buf<>Nil) and (Len>0) then
  1540. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1541. end;
  1542. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1543. var
  1544. BufLen: SizeInt;
  1545. begin
  1546. SetLength(S,Len);
  1547. If (Buf<>Nil) and (Len>0) then
  1548. begin
  1549. BufLen := IndexByte(Buf^, Len+1, 0);
  1550. If (BufLen>0) and (BufLen < Len) then
  1551. Len := BufLen;
  1552. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1553. //PUnicodeChar(Pointer(S)+Len*sizeof(UnicodeChar))^:=#0;
  1554. end;
  1555. end;
  1556. {$ifndef FPUNONE}
  1557. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1558. Var
  1559. SS : String;
  1560. begin
  1561. fpc_Val_Real_UnicodeStr := 0;
  1562. if length(S) > 255 then
  1563. code := 256
  1564. else
  1565. begin
  1566. SS := S;
  1567. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1568. end;
  1569. end;
  1570. {$endif}
  1571. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1572. var ss:shortstring;
  1573. begin
  1574. if length(s)>255 then
  1575. code:=256
  1576. else
  1577. begin
  1578. ss:=s;
  1579. val(ss,fpc_val_enum_unicodestr,code);
  1580. end;
  1581. end;
  1582. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1583. Var
  1584. SS : String;
  1585. begin
  1586. if length(S) > 255 then
  1587. begin
  1588. fpc_Val_Currency_UnicodeStr:=0;
  1589. code := 256;
  1590. end
  1591. else
  1592. begin
  1593. SS := S;
  1594. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1595. end;
  1596. end;
  1597. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1598. Var
  1599. SS : ShortString;
  1600. begin
  1601. fpc_Val_UInt_UnicodeStr := 0;
  1602. if length(S) > 255 then
  1603. code := 256
  1604. else
  1605. begin
  1606. SS := S;
  1607. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1608. end;
  1609. end;
  1610. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1611. Var
  1612. SS : ShortString;
  1613. begin
  1614. fpc_Val_SInt_UnicodeStr:=0;
  1615. if length(S)>255 then
  1616. code:=256
  1617. else
  1618. begin
  1619. SS := S;
  1620. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1621. end;
  1622. end;
  1623. {$ifndef CPU64}
  1624. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1625. Var
  1626. SS : ShortString;
  1627. begin
  1628. fpc_Val_qword_UnicodeStr:=0;
  1629. if length(S)>255 then
  1630. code:=256
  1631. else
  1632. begin
  1633. SS := S;
  1634. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1635. end;
  1636. end;
  1637. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1638. Var
  1639. SS : ShortString;
  1640. begin
  1641. fpc_Val_int64_UnicodeStr:=0;
  1642. if length(S)>255 then
  1643. code:=256
  1644. else
  1645. begin
  1646. SS := S;
  1647. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1648. end;
  1649. end;
  1650. {$endif CPU64}
  1651. {$ifndef FPUNONE}
  1652. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1653. var
  1654. ss : shortstring;
  1655. begin
  1656. str_real(len,fr,d,treal_type(rt),ss);
  1657. s:=ss;
  1658. end;
  1659. {$endif}
  1660. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1661. var ss:shortstring;
  1662. begin
  1663. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1664. s:=ss;
  1665. end;
  1666. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1667. var ss:shortstring;
  1668. begin
  1669. fpc_shortstr_bool(b,len,ss);
  1670. s:=ss;
  1671. end;
  1672. {$ifdef FPC_HAS_STR_CURRENCY}
  1673. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1674. var
  1675. ss : shortstring;
  1676. begin
  1677. str(c:len:fr,ss);
  1678. s:=ss;
  1679. end;
  1680. {$endif FPC_HAS_STR_CURRENCY}
  1681. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1682. Var
  1683. SS : ShortString;
  1684. begin
  1685. Str (v:Len,SS);
  1686. S:=SS;
  1687. end;
  1688. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1689. Var
  1690. SS : ShortString;
  1691. begin
  1692. str(v:Len,SS);
  1693. S:=SS;
  1694. end;
  1695. {$ifndef CPU64}
  1696. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1697. Var
  1698. SS : ShortString;
  1699. begin
  1700. Str (v:Len,SS);
  1701. S:=SS;
  1702. end;
  1703. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1704. Var
  1705. SS : ShortString;
  1706. begin
  1707. str(v:Len,SS);
  1708. S:=SS;
  1709. end;
  1710. {$endif CPU64}
  1711. { converts an utf-16 code point or surrogate pair to utf-32 }
  1712. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1713. var
  1714. w: unicodechar;
  1715. begin
  1716. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1717. { are the same in UTF-32 }
  1718. w:=s[index];
  1719. if (w<=#$d7ff) or
  1720. (w>=#$e000) then
  1721. begin
  1722. result:=UCS4Char(w);
  1723. len:=1;
  1724. end
  1725. { valid surrogate pair? }
  1726. else if (w<=#$dbff) and
  1727. { w>=#$d7ff check not needed, checked above }
  1728. (index<length(s)) and
  1729. (s[index+1]>=#$dc00) and
  1730. (s[index+1]<=#$dfff) then
  1731. { convert the surrogate pair to UTF-32 }
  1732. begin
  1733. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1734. len:=2;
  1735. end
  1736. else
  1737. { invalid surrogate -> do nothing }
  1738. begin
  1739. result:=UCS4Char(w);
  1740. len:=1;
  1741. end;
  1742. end;
  1743. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1744. begin
  1745. if assigned(Source) then
  1746. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1747. else
  1748. Result:=0;
  1749. end;
  1750. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1751. var
  1752. i,j : SizeUInt;
  1753. w : word;
  1754. lw : longword;
  1755. len : longint;
  1756. begin
  1757. result:=0;
  1758. if source=nil then
  1759. exit;
  1760. i:=0;
  1761. j:=0;
  1762. if assigned(Dest) then
  1763. begin
  1764. while (i<SourceChars) and (j<MaxDestBytes) do
  1765. begin
  1766. w:=word(Source[i]);
  1767. case w of
  1768. 0..$7f:
  1769. begin
  1770. Dest[j]:=char(w);
  1771. inc(j);
  1772. end;
  1773. $80..$7ff:
  1774. begin
  1775. if j+1>=MaxDestBytes then
  1776. break;
  1777. Dest[j]:=char($c0 or (w shr 6));
  1778. Dest[j+1]:=char($80 or (w and $3f));
  1779. inc(j,2);
  1780. end;
  1781. $800..$d7ff,$e000..$ffff:
  1782. begin
  1783. if j+2>=MaxDestBytes then
  1784. break;
  1785. Dest[j]:=char($e0 or (w shr 12));
  1786. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1787. Dest[j+2]:=char($80 or (w and $3f));
  1788. inc(j,3);
  1789. end;
  1790. $d800..$dbff:
  1791. {High Surrogates}
  1792. begin
  1793. if j+3>=MaxDestBytes then
  1794. break;
  1795. if (i<sourcechars-1) and
  1796. (word(Source[i+1]) >= $dc00) and
  1797. (word(Source[i+1]) <= $dfff) then
  1798. begin
  1799. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1800. Dest[j]:=char($f0 or (lw shr 18));
  1801. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1802. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1803. Dest[j+3]:=char($80 or (lw and $3f));
  1804. inc(j,4);
  1805. inc(i);
  1806. end;
  1807. end;
  1808. end;
  1809. inc(i);
  1810. end;
  1811. if j>SizeUInt(MaxDestBytes-1) then
  1812. j:=MaxDestBytes-1;
  1813. Dest[j]:=#0;
  1814. end
  1815. else
  1816. begin
  1817. while i<SourceChars do
  1818. begin
  1819. case word(Source[i]) of
  1820. $0..$7f:
  1821. inc(j);
  1822. $80..$7ff:
  1823. inc(j,2);
  1824. $800..$d7ff,$e000..$ffff:
  1825. inc(j,3);
  1826. $d800..$dbff:
  1827. begin
  1828. if (i<sourcechars-1) and
  1829. (word(Source[i+1]) >= $dc00) and
  1830. (word(Source[i+1]) <= $dfff) then
  1831. begin
  1832. inc(j,4);
  1833. inc(i);
  1834. end;
  1835. end;
  1836. end;
  1837. inc(i);
  1838. end;
  1839. end;
  1840. result:=j+1;
  1841. end;
  1842. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1843. begin
  1844. if assigned(Source) then
  1845. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1846. else
  1847. Result:=0;
  1848. end;
  1849. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1850. const
  1851. UNICODE_INVALID=63;
  1852. var
  1853. InputUTF8: SizeUInt;
  1854. IBYTE: BYTE;
  1855. OutputUnicode: SizeUInt;
  1856. PRECHAR: SizeUInt;
  1857. TempBYTE: BYTE;
  1858. CharLen: SizeUint;
  1859. LookAhead: SizeUInt;
  1860. UC: SizeUInt;
  1861. begin
  1862. if not assigned(Source) then
  1863. begin
  1864. result:=0;
  1865. exit;
  1866. end;
  1867. result:=SizeUInt(-1);
  1868. InputUTF8:=0;
  1869. OutputUnicode:=0;
  1870. PreChar:=0;
  1871. if Assigned(Dest) Then
  1872. begin
  1873. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1874. begin
  1875. IBYTE:=byte(Source[InputUTF8]);
  1876. if (IBYTE and $80) = 0 then
  1877. begin
  1878. //One character US-ASCII, convert it to unicode
  1879. if IBYTE = 10 then
  1880. begin
  1881. If (PreChar<>13) and FALSE then
  1882. begin
  1883. //Expand to crlf, conform UTF-8.
  1884. //This procedure will break the memory alocation by
  1885. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1886. if OutputUnicode+1<MaxDestChars then
  1887. begin
  1888. Dest[OutputUnicode]:=WideChar(13);
  1889. inc(OutputUnicode);
  1890. Dest[OutputUnicode]:=WideChar(10);
  1891. inc(OutputUnicode);
  1892. PreChar:=10;
  1893. end
  1894. else
  1895. begin
  1896. Dest[OutputUnicode]:=WideChar(13);
  1897. inc(OutputUnicode);
  1898. end;
  1899. end
  1900. else
  1901. begin
  1902. Dest[OutputUnicode]:=WideChar(IBYTE);
  1903. inc(OutputUnicode);
  1904. PreChar:=IBYTE;
  1905. end;
  1906. end
  1907. else
  1908. begin
  1909. Dest[OutputUnicode]:=WideChar(IBYTE);
  1910. inc(OutputUnicode);
  1911. PreChar:=IBYTE;
  1912. end;
  1913. inc(InputUTF8);
  1914. end
  1915. else
  1916. begin
  1917. TempByte:=IBYTE;
  1918. CharLen:=0;
  1919. while (TempBYTE and $80)<>0 do
  1920. begin
  1921. TempBYTE:=(TempBYTE shl 1) and $FE;
  1922. inc(CharLen);
  1923. end;
  1924. //Test for the "CharLen" conforms UTF-8 string
  1925. //This means the 10xxxxxx pattern.
  1926. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1927. begin
  1928. //Insuficient chars in string to decode
  1929. //UTF-8 array. Fallback to single char.
  1930. CharLen:= 1;
  1931. end;
  1932. for LookAhead := 1 to CharLen-1 do
  1933. begin
  1934. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1935. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1936. begin
  1937. //Invalid UTF-8 sequence, fallback.
  1938. CharLen:= LookAhead;
  1939. break;
  1940. end;
  1941. end;
  1942. UC:=$FFFF;
  1943. case CharLen of
  1944. 1: begin
  1945. //Not valid UTF-8 sequence
  1946. UC:=UNICODE_INVALID;
  1947. end;
  1948. 2: begin
  1949. //Two bytes UTF, convert it
  1950. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1951. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1952. if UC <= $7F then
  1953. begin
  1954. //Invalid UTF sequence.
  1955. UC:=UNICODE_INVALID;
  1956. end;
  1957. end;
  1958. 3: begin
  1959. //Three bytes, convert it to unicode
  1960. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1961. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1962. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1963. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1964. begin
  1965. //Invalid UTF-8 sequence
  1966. UC:= UNICODE_INVALID;
  1967. End;
  1968. end;
  1969. 4: begin
  1970. //Four bytes, convert it to two unicode characters
  1971. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1972. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1973. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1974. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1975. if (UC < $10000) or (UC > $10FFFF) then
  1976. begin
  1977. UC:= UNICODE_INVALID;
  1978. end
  1979. else
  1980. begin
  1981. { only store pair if room }
  1982. dec(UC,$10000);
  1983. if (OutputUnicode<MaxDestChars-1) then
  1984. begin
  1985. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1986. inc(OutputUnicode);
  1987. UC:=(UC and $3ff) + $DC00;
  1988. end
  1989. else
  1990. begin
  1991. InputUTF8:= InputUTF8 + CharLen;
  1992. { don't store anything }
  1993. CharLen:=0;
  1994. end;
  1995. end;
  1996. end;
  1997. 5,6,7: begin
  1998. //Invalid UTF8 to unicode conversion,
  1999. //mask it as invalid UNICODE too.
  2000. UC:=UNICODE_INVALID;
  2001. end;
  2002. end;
  2003. if CharLen > 0 then
  2004. begin
  2005. PreChar:=UC;
  2006. Dest[OutputUnicode]:=WideChar(UC);
  2007. inc(OutputUnicode);
  2008. end;
  2009. InputUTF8:= InputUTF8 + CharLen;
  2010. end;
  2011. end;
  2012. Result:=OutputUnicode+1;
  2013. end
  2014. else
  2015. begin
  2016. while (InputUTF8<SourceBytes) do
  2017. begin
  2018. IBYTE:=byte(Source[InputUTF8]);
  2019. if (IBYTE and $80) = 0 then
  2020. begin
  2021. //One character US-ASCII, convert it to unicode
  2022. if IBYTE = 10 then
  2023. begin
  2024. if (PreChar<>13) and FALSE then
  2025. begin
  2026. //Expand to crlf, conform UTF-8.
  2027. //This procedure will break the memory alocation by
  2028. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  2029. inc(OutputUnicode,2);
  2030. PreChar:=10;
  2031. end
  2032. else
  2033. begin
  2034. inc(OutputUnicode);
  2035. PreChar:=IBYTE;
  2036. end;
  2037. end
  2038. else
  2039. begin
  2040. inc(OutputUnicode);
  2041. PreChar:=IBYTE;
  2042. end;
  2043. inc(InputUTF8);
  2044. end
  2045. else
  2046. begin
  2047. TempByte:=IBYTE;
  2048. CharLen:=0;
  2049. while (TempBYTE and $80)<>0 do
  2050. begin
  2051. TempBYTE:=(TempBYTE shl 1) and $FE;
  2052. inc(CharLen);
  2053. end;
  2054. //Test for the "CharLen" conforms UTF-8 string
  2055. //This means the 10xxxxxx pattern.
  2056. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  2057. begin
  2058. //Insuficient chars in string to decode
  2059. //UTF-8 array. Fallback to single char.
  2060. CharLen:= 1;
  2061. end;
  2062. for LookAhead := 1 to CharLen-1 do
  2063. begin
  2064. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  2065. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  2066. begin
  2067. //Invalid UTF-8 sequence, fallback.
  2068. CharLen:= LookAhead;
  2069. break;
  2070. end;
  2071. end;
  2072. UC:=$FFFF;
  2073. case CharLen of
  2074. 1: begin
  2075. //Not valid UTF-8 sequence
  2076. UC:=UNICODE_INVALID;
  2077. end;
  2078. 2: begin
  2079. //Two bytes UTF, convert it
  2080. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  2081. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  2082. if UC <= $7F then
  2083. begin
  2084. //Invalid UTF sequence.
  2085. UC:=UNICODE_INVALID;
  2086. end;
  2087. end;
  2088. 3: begin
  2089. //Three bytes, convert it to unicode
  2090. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  2091. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  2092. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  2093. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  2094. begin
  2095. //Invalid UTF-8 sequence
  2096. UC:= UNICODE_INVALID;
  2097. end;
  2098. end;
  2099. 4: begin
  2100. //Four bytes, convert it to two unicode characters
  2101. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  2102. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  2103. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  2104. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  2105. if (UC < $10000) or (UC > $10FFFF) then
  2106. UC:= UNICODE_INVALID
  2107. else
  2108. { extra character character }
  2109. inc(OutputUnicode);
  2110. end;
  2111. 5,6,7: begin
  2112. //Invalid UTF8 to unicode conversion,
  2113. //mask it as invalid UNICODE too.
  2114. UC:=UNICODE_INVALID;
  2115. end;
  2116. end;
  2117. if CharLen > 0 then
  2118. begin
  2119. PreChar:=UC;
  2120. inc(OutputUnicode);
  2121. end;
  2122. InputUTF8:= InputUTF8 + CharLen;
  2123. end;
  2124. end;
  2125. Result:=OutputUnicode+1;
  2126. end;
  2127. end;
  2128. function UTF8Encode(const s : RawByteString) : UTF8String; inline;
  2129. begin
  2130. Result:=UTF8Encode(UnicodeString(s));
  2131. end;
  2132. function UTF8Encode(const s : UnicodeString) : UTF8String;
  2133. var
  2134. i : SizeInt;
  2135. hs : UTF8String;
  2136. begin
  2137. result:='';
  2138. if s='' then
  2139. exit;
  2140. SetLength(hs,length(s)*3);
  2141. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  2142. if i>0 then
  2143. begin
  2144. SetLength(hs,i-1);
  2145. result:=hs;
  2146. end;
  2147. end;
  2148. function UTF8Decode(const s : UTF8String): UnicodeString;
  2149. var
  2150. i : SizeInt;
  2151. hs : UnicodeString;
  2152. begin
  2153. result:='';
  2154. if s='' then
  2155. exit;
  2156. SetLength(hs,length(s));
  2157. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  2158. if i>0 then
  2159. begin
  2160. SetLength(hs,i-1);
  2161. result:=hs;
  2162. end;
  2163. end;
  2164. function AnsiToUtf8(const s : RawByteString): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
  2165. begin
  2166. Result:=Utf8Encode(s);
  2167. end;
  2168. function Utf8ToAnsi(const s : UTF8String) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2169. begin
  2170. Result:=Utf8Decode(s);
  2171. end;
  2172. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  2173. var
  2174. i, slen,
  2175. destindex : SizeInt;
  2176. len : longint;
  2177. begin
  2178. slen:=length(s);
  2179. setlength(result,slen+1);
  2180. i:=1;
  2181. destindex:=0;
  2182. while (i<=slen) do
  2183. begin
  2184. result[destindex]:=utf16toutf32(s,i,len);
  2185. inc(destindex);
  2186. inc(i,len);
  2187. end;
  2188. { destindex <= slen (surrogate pairs may have been merged) }
  2189. { destindex+1 for terminating #0 (dynamic arrays are }
  2190. { implicitely filled with zero) }
  2191. setlength(result,destindex+1);
  2192. end;
  2193. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  2194. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  2195. var
  2196. p : PUnicodeChar;
  2197. begin
  2198. { if nc > $ffff, we need two places }
  2199. if (index+ord(nc > $ffff)>length(s)) then
  2200. if (length(s) < 10*256) then
  2201. setlength(s,length(s)+10)
  2202. else
  2203. setlength(s,length(s)+length(s) shr 8);
  2204. { we know that s is unique -> avoid uniquestring calls}
  2205. p:=@s[index];
  2206. if (nc<$ffff) then
  2207. begin
  2208. p^:=unicodechar(nc);
  2209. inc(index);
  2210. end
  2211. else if (dword(nc)<=$10ffff) then
  2212. begin
  2213. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  2214. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  2215. inc(index,2);
  2216. end
  2217. else
  2218. { invalid code point }
  2219. begin
  2220. p^:='?';
  2221. inc(index);
  2222. end;
  2223. end;
  2224. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  2225. var
  2226. i : SizeInt;
  2227. resindex : SizeInt;
  2228. begin
  2229. { skip terminating #0 }
  2230. SetLength(result,length(s)-1);
  2231. resindex:=1;
  2232. for i:=0 to high(s)-1 do
  2233. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  2234. { adjust result length (may be too big due to growing }
  2235. { for surrogate pairs) }
  2236. setlength(result,resindex-1);
  2237. end;
  2238. function WideStringToUCS4String(const s : WideString) : UCS4String;
  2239. var
  2240. i, slen,
  2241. destindex : SizeInt;
  2242. len : longint;
  2243. begin
  2244. slen:=length(s);
  2245. setlength(result,slen+1);
  2246. i:=1;
  2247. destindex:=0;
  2248. while (i<=slen) do
  2249. begin
  2250. result[destindex]:=utf16toutf32(s,i,len);
  2251. inc(destindex);
  2252. inc(i,len);
  2253. end;
  2254. { destindex <= slen (surrogate pairs may have been merged) }
  2255. { destindex+1 for terminating #0 (dynamic arrays are }
  2256. { implicitely filled with zero) }
  2257. setlength(result,destindex+1);
  2258. end;
  2259. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  2260. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  2261. var
  2262. p : PWideChar;
  2263. begin
  2264. { if nc > $ffff, we need two places }
  2265. if (index+ord(nc > $ffff)>length(s)) then
  2266. if (length(s) < 10*256) then
  2267. setlength(s,length(s)+10)
  2268. else
  2269. setlength(s,length(s)+length(s) shr 8);
  2270. { we know that s is unique -> avoid uniquestring calls}
  2271. p:=@s[index];
  2272. if (nc<$ffff) then
  2273. begin
  2274. p^:=widechar(nc);
  2275. inc(index);
  2276. end
  2277. else if (dword(nc)<=$10ffff) then
  2278. begin
  2279. p^:=widechar((nc - $10000) shr 10 + $d800);
  2280. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  2281. inc(index,2);
  2282. end
  2283. else
  2284. { invalid code point }
  2285. begin
  2286. p^:='?';
  2287. inc(index);
  2288. end;
  2289. end;
  2290. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2291. var
  2292. i : SizeInt;
  2293. resindex : SizeInt;
  2294. begin
  2295. { skip terminating #0 }
  2296. SetLength(result,length(s)-1);
  2297. resindex:=1;
  2298. for i:=0 to high(s)-1 do
  2299. ConcatUTF32ToWideStr(s[i],result,resindex);
  2300. { adjust result length (may be too big due to growing }
  2301. { for surrogate pairs) }
  2302. setlength(result,resindex-1);
  2303. end;
  2304. const
  2305. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2306. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2307. procedure unimplementedunicodestring;
  2308. begin
  2309. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2310. If IsConsole then
  2311. begin
  2312. Writeln(StdErr,SNoUnicodestrings);
  2313. Writeln(StdErr,SRecompileWithUnicodestrings);
  2314. end;
  2315. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2316. HandleErrorFrame(233,get_frame);
  2317. end;
  2318. function StringElementSize(const S: UnicodeString): Word; overload;
  2319. begin
  2320. if assigned(Pointer(S)) then
  2321. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2322. else
  2323. Result:=SizeOf(UnicodeChar);
  2324. end;
  2325. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2326. begin
  2327. if assigned(Pointer(S)) then
  2328. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2329. else
  2330. Result:=0;
  2331. end;
  2332. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2333. begin
  2334. {$ifdef FPC_HAS_CPSTRING}
  2335. if assigned(Pointer(S)) then
  2336. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2337. else
  2338. {$endif FPC_HAS_CPSTRING}
  2339. Result:=DefaultUnicodeCodePage;
  2340. end;
  2341. {$warnings off}
  2342. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2343. begin
  2344. unimplementedunicodestring;
  2345. end;
  2346. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2347. begin
  2348. unimplementedunicodestring;
  2349. end;
  2350. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2351. begin
  2352. unimplementedunicodestring;
  2353. end;
  2354. {$warnings on}
  2355. procedure initunicodestringmanager;
  2356. begin
  2357. {$ifndef HAS_WIDESTRINGMANAGER}
  2358. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2359. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2360. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2361. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2362. {$endif HAS_WIDESTRINGMANAGER}
  2363. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2364. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2365. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2366. {$ifndef HAS_WIDESTRINGMANAGER}
  2367. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2368. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2369. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2370. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2371. {$endif HAS_WIDESTRINGMANAGER}
  2372. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2373. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2374. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2375. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2376. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2377. end;