ustrings.inc 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S = SizeOf(SizeInt)) :
  19. @-2*S : SizeInt for reference count;
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. end;
  40. Const
  41. UnicodeFirstOff = SizeOf(TUnicodeRec);
  42. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overridden for the Current Locale
  47. }
  48. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  49. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  50. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  51. var
  52. i : SizeInt;
  53. p : PAnsiChar;
  54. begin
  55. setlength(dest,len);
  56. if not assigned(pointer(dest)) then
  57. exit;
  58. SetCodePage(dest,cp,false);
  59. p:=pointer(dest); {SetLength guarantees that dest is unique}
  60. for i:=1 to len do
  61. begin
  62. if word(source^)<256 then
  63. p^:=char(word(source^))
  64. else
  65. p^:='?';
  66. inc(source);
  67. inc(p);
  68. end;
  69. end;
  70. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  71. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  72. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  73. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  74. var
  75. i : SizeInt;
  76. p : PUnicodeChar;
  77. begin
  78. setlength(dest,len);
  79. p:=pointer(dest); {SetLength guarantees that dest is unique}
  80. for i:=1 to len do
  81. begin
  82. p^:=unicodechar(byte(source^));
  83. inc(source);
  84. inc(p);
  85. end;
  86. end;
  87. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  88. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  89. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  90. begin
  91. DefaultCharLengthPChar:=length(Str);
  92. end;
  93. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  94. begin
  95. if str[0]<>#0 then
  96. DefaultCodePointLength:=1
  97. else
  98. DefaultCodePointLength:=0;
  99. end;
  100. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  101. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  102. begin
  103. { don't raise an exception here. We need this for text file handling }
  104. if stdcp<>scpFileSystemSingleByte then
  105. Result:=DefaultSystemCodePage
  106. else
  107. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  108. without a fully functional widestring manager that will probably cause
  109. more problems that it solves }
  110. Result:=DefaultFileSystemCodePage
  111. end;
  112. Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
  113. begin
  114. manager:=widestringmanager;
  115. end;
  116. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
  117. begin
  118. Old:=widestringmanager;
  119. widestringmanager:=New;
  120. end;
  121. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  122. begin
  123. widestringmanager:=New;
  124. end;
  125. Procedure GetWideStringManager (out Manager : TUnicodeStringManager);
  126. begin
  127. manager:=widestringmanager;
  128. end;
  129. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out old: TUnicodeStringManager);
  130. begin
  131. Old:=widestringmanager;
  132. widestringmanager:=New;
  133. end;
  134. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  135. begin
  136. widestringmanager:=New;
  137. end;
  138. {****************************************************************************
  139. Internal functions, not in interface.
  140. ****************************************************************************}
  141. procedure UnicodeStringError;
  142. begin
  143. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  144. end;
  145. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  146. {$define FPC_HAS_NEW_UNICODESTRING}
  147. Function NewUnicodeString(Len : SizeInt) : Pointer;
  148. {
  149. Allocate a new UnicodeString on the heap.
  150. initialize it to zero length and reference count 1.
  151. }
  152. Var
  153. P : Pointer;
  154. begin
  155. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  156. If P<>Nil then
  157. begin
  158. PUnicodeRec(P)^.Len:=Len; { Initial length }
  159. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  160. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  161. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  162. inc(p,UnicodeFirstOff); { Points to string now }
  163. PUnicodeChar(P)^:=#0; { Terminating #0 }
  164. end
  165. else
  166. UnicodeStringError;
  167. NewUnicodeString:=P;
  168. end;
  169. {$endif FPC_HAS_NEW_UNICODESTRING}
  170. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  171. {$define FPC_HAS_UNICODESTR_DECR_REF}
  172. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  173. {
  174. Decreases the ReferenceCount of a non constant unicodestring;
  175. If the reference count is zero, deallocate the string;
  176. }
  177. Var
  178. p: PUnicodeRec;
  179. Begin
  180. { Zero string }
  181. if S=Nil then
  182. exit;
  183. { check for constant strings ...}
  184. p:=PUnicodeRec(S-UnicodeFirstOff);
  185. S:=nil;
  186. if p^.Ref<0 then
  187. exit;
  188. { declocked does a MT safe dec and returns true, if the counter is 0 }
  189. if declocked(p^.Ref) then
  190. FreeMem(p);
  191. end;
  192. { alias for internal use }
  193. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  194. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  195. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  196. {$define FPC_HAS_UNICODESTR_INCR_REF}
  197. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  198. Begin
  199. If S=Nil then
  200. exit;
  201. { constant string ? }
  202. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  203. exit;
  204. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  205. end;
  206. { alias for internal use }
  207. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  208. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  209. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  210. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  211. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  212. {
  213. Converts a UnicodeString to a ShortString;
  214. }
  215. Var
  216. Size : SizeInt;
  217. temp : ansistring;
  218. begin
  219. res:='';
  220. Size:=Length(S2);
  221. if Size>0 then
  222. begin
  223. If Size>high(res) then
  224. Size:=high(res);
  225. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  226. res:=temp;
  227. end;
  228. end;
  229. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  230. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  231. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  232. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  233. {
  234. Converts a ShortString to a UnicodeString;
  235. }
  236. Var
  237. Size : SizeInt;
  238. begin
  239. result:='';
  240. Size:=Length(S2);
  241. if Size>0 then
  242. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  243. end;
  244. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  245. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  246. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  247. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  248. {
  249. Converts a UnicodeString to an AnsiString
  250. }
  251. Var
  252. Size : SizeInt;
  253. {$ifndef FPC_HAS_CPSTRING}
  254. cp : TSystemCodePage;
  255. {$endif FPC_HAS_CPSTRING}
  256. begin
  257. {$ifndef FPC_HAS_CPSTRING}
  258. cp:=DefaultSystemCodePage;
  259. {$endif FPC_HAS_CPSTRING}
  260. result:='';
  261. Size:=Length(S2);
  262. if Size>0 then
  263. begin
  264. cp:=TranslatePlaceholderCP(cp);
  265. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  266. end;
  267. end;
  268. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  269. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  270. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  271. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  272. {
  273. Converts an AnsiString to a UnicodeString;
  274. }
  275. Var
  276. Size : SizeInt;
  277. cp: TSystemCodePage;
  278. begin
  279. result:='';
  280. Size:=Length(S2);
  281. if Size>0 then
  282. begin
  283. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  284. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  285. end;
  286. end;
  287. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  288. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  289. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  290. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  291. begin
  292. SetLength(Result,Length(S2));
  293. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  294. end;
  295. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  296. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  297. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  298. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  304. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  305. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  306. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  307. var
  308. Size : SizeInt;
  309. begin
  310. result:='';
  311. if p=nil then
  312. exit;
  313. Size := IndexWord(p^, -1, 0);
  314. Setlength(result,Size);
  315. if Size>0 then
  316. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  317. end;
  318. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  319. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  320. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  321. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  322. var
  323. Size : SizeInt;
  324. {$ifndef FPC_HAS_CPSTRING}
  325. cp : TSystemCodePage;
  326. {$endif FPC_HAS_CPSTRING}
  327. begin
  328. {$ifndef FPC_HAS_CPSTRING}
  329. cp:=DefaultSystemCodePage;
  330. {$endif FPC_HAS_CPSTRING}
  331. result:='';
  332. if p=nil then
  333. exit;
  334. Size := IndexWord(p^, -1, 0);
  335. if Size>0 then
  336. begin
  337. cp:=TranslatePlaceholderCP(cp);
  338. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  339. end;
  340. end;
  341. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  342. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  343. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  344. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  345. var
  346. Size : SizeInt;
  347. temp: ansistring;
  348. begin
  349. res:='';
  350. if p=nil then
  351. exit;
  352. Size:=IndexWord(p^, high(PtrInt), 0);
  353. if Size>0 then
  354. begin
  355. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  356. res:=temp;
  357. end;
  358. end;
  359. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  360. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  361. {$define FPC_UNICODESTR_ASSIGN}
  362. { checked against the ansistring routine, 2001-05-27 (FK) }
  363. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  364. {
  365. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  366. }
  367. begin
  368. If S2<>nil then
  369. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  370. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  371. { Decrease the reference count on the old S1 }
  372. fpc_unicodestr_decr_ref (S1);
  373. s1:=s2;
  374. end;
  375. { alias for internal use }
  376. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  377. {$endif FPC_UNICODESTR_ASSIGN}
  378. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  379. {$define FPC_HAS_UNICODESTR_CONCAT}
  380. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  381. Var
  382. Size,Location : SizeInt;
  383. same : boolean;
  384. begin
  385. { only assign if s1 or s2 is empty }
  386. if (S1='') then
  387. begin
  388. DestS:=s2;
  389. exit;
  390. end;
  391. if (S2='') then
  392. begin
  393. DestS:=s1;
  394. exit;
  395. end;
  396. Location:=Length(S1);
  397. Size:=length(S2);
  398. { Use Pointer() typecasts to prevent extra conversion code }
  399. if Pointer(DestS)=Pointer(S1) then
  400. begin
  401. same:=Pointer(S1)=Pointer(S2);
  402. SetLength(DestS,Size+Location);
  403. if same then
  404. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  405. else
  406. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  407. end
  408. else if Pointer(DestS)=Pointer(S2) then
  409. begin
  410. SetLength(DestS,Size+Location);
  411. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  412. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  413. end
  414. else
  415. begin
  416. DestS:='';
  417. SetLength(DestS,Size+Location);
  418. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  419. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  420. end;
  421. end;
  422. {$endif FPC_HAS_UNICODESTR_CONCAT}
  423. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  424. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  425. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  426. Var
  427. i : Longint;
  428. p,pc : pointer;
  429. Size,NewLen : SizeInt;
  430. lowstart : longint;
  431. destcopy : pointer;
  432. OldDestLen : SizeInt;
  433. begin
  434. if high(sarr)=0 then
  435. begin
  436. DestS:='';
  437. exit;
  438. end;
  439. destcopy:=nil;
  440. lowstart:=low(sarr);
  441. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  442. inc(lowstart);
  443. { Check for another reuse, then we can't use
  444. the append optimization }
  445. for i:=lowstart to high(sarr) do
  446. begin
  447. if Pointer(DestS)=Pointer(sarr[i]) then
  448. begin
  449. { if DestS is used somewhere in the middle of the expression,
  450. we need to make sure the original string still exists after
  451. we empty/modify DestS.
  452. This trick only works with reference counted strings. Therefor
  453. this optimization is disabled for WINLIKEUNICODESTRING }
  454. destcopy:=pointer(dests);
  455. fpc_UnicodeStr_Incr_Ref(destcopy);
  456. lowstart:=low(sarr);
  457. break;
  458. end;
  459. end;
  460. { Start with empty DestS if we start with concatting
  461. the first array element }
  462. if lowstart=low(sarr) then
  463. DestS:='';
  464. OldDestLen:=length(DestS);
  465. { Calculate size of the result so we can do
  466. a single call to SetLength() }
  467. NewLen:=0;
  468. for i:=low(sarr) to high(sarr) do
  469. inc(NewLen,length(sarr[i]));
  470. SetLength(DestS,NewLen);
  471. { Concat all strings, except the string we already
  472. copied in DestS }
  473. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  474. for i:=lowstart to high(sarr) do
  475. begin
  476. p:=pointer(sarr[i]);
  477. if assigned(p) then
  478. begin
  479. Size:=length(unicodestring(p));
  480. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  481. inc(pc,size*sizeof(UnicodeChar));
  482. end;
  483. end;
  484. fpc_UnicodeStr_Decr_Ref(destcopy);
  485. end;
  486. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  487. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  488. {$define FPC_HAS_CHAR_TO_UCHAR}
  489. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  490. var
  491. w: unicodestring;
  492. begin
  493. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  494. fpc_Char_To_UChar:=w[1];
  495. end;
  496. {$endif FPC_HAS_CHAR_TO_UCHAR}
  497. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  498. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  499. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  500. {
  501. Converts a Char to a UnicodeString;
  502. }
  503. begin
  504. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  505. end;
  506. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  507. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  508. {$define FPC_HAS_UCHAR_TO_CHAR}
  509. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  510. {
  511. Converts a UnicodeChar to a Char;
  512. }
  513. var
  514. s: ansistring;
  515. begin
  516. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  517. if length(s)=1 then
  518. fpc_UChar_To_Char:= s[1]
  519. else
  520. fpc_UChar_To_Char:='?';
  521. end;
  522. {$endif FPC_HAS_UCHAR_TO_CHAR}
  523. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  524. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  525. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  526. {
  527. Converts a WideChar to a ShortString;
  528. }
  529. var
  530. s: ansistring;
  531. begin
  532. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  533. result:=s;
  534. end;
  535. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  536. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  537. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  538. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  539. {
  540. Converts a UnicodeChar to a UnicodeString;
  541. }
  542. begin
  543. Setlength (fpc_UChar_To_UnicodeStr,1);
  544. fpc_UChar_To_UnicodeStr[1]:= c;
  545. end;
  546. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  547. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  548. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  549. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  550. {
  551. Converts a UnicodeChar to a AnsiString;
  552. }
  553. {$ifndef FPC_HAS_CPSTRING}
  554. var
  555. cp : TSystemCodePage;
  556. {$endif FPC_HAS_CPSTRING}
  557. begin
  558. {$ifndef FPC_HAS_CPSTRING}
  559. cp:=DefaultSystemCodePage;
  560. {$endif FPC_HAS_CPSTRING}
  561. cp:=TranslatePlaceholderCP(cp);
  562. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  563. end;
  564. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  565. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  566. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  567. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  568. Var
  569. L : SizeInt;
  570. begin
  571. if (not assigned(p)) or (p[0]=#0) Then
  572. begin
  573. fpc_pchar_to_unicodestr := '';
  574. exit;
  575. end;
  576. l:=IndexChar(p^,-1,#0);
  577. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  578. end;
  579. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  580. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  581. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  582. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  583. var
  584. i : SizeInt;
  585. begin
  586. if zerobased then
  587. begin
  588. if arr[0]=#0 Then
  589. begin
  590. fpc_chararray_to_unicodestr:='';
  591. exit;
  592. end;
  593. i:=IndexChar(arr,high(arr)+1,#0);
  594. if i=-1 then
  595. i:=high(arr)+1;
  596. end
  597. else
  598. i:=high(arr)+1;
  599. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  600. end;
  601. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  602. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  603. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  604. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  605. var
  606. i : SizeInt;
  607. begin
  608. if (zerobased) then
  609. begin
  610. i:=IndexWord(arr,high(arr)+1,0);
  611. if i = -1 then
  612. i := high(arr)+1;
  613. end
  614. else
  615. i := high(arr)+1;
  616. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  617. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  618. end;
  619. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  620. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  621. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  622. { due to their names, the following procedures should be in wstrings.inc,
  623. however, the compiler generates code using this functions on all platforms }
  624. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  625. var
  626. l: longint;
  627. index: ptrint;
  628. len: byte;
  629. temp: ansistring;
  630. begin
  631. l := high(arr)+1;
  632. if l>=high(res)+1 then
  633. l:=high(res)
  634. else if l<0 then
  635. l:=0;
  636. if zerobased then
  637. begin
  638. index:=IndexWord(arr[0],l,0);
  639. if index<0 then
  640. len:=l
  641. else
  642. len:=index;
  643. end
  644. else
  645. len:=l;
  646. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  647. res:=temp;
  648. end;
  649. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  650. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  651. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  652. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  653. var
  654. i : SizeInt;
  655. {$ifndef FPC_HAS_CPSTRING}
  656. cp : TSystemCodePage;
  657. {$endif FPC_HAS_CPSTRING}
  658. begin
  659. {$ifndef FPC_HAS_CPSTRING}
  660. cp:=DefaultSystemCodePage;
  661. {$endif FPC_HAS_CPSTRING}
  662. if (zerobased) then
  663. begin
  664. i:=IndexWord(arr,high(arr)+1,0);
  665. if i = -1 then
  666. i := high(arr)+1;
  667. end
  668. else
  669. i := high(arr)+1;
  670. if i > 0 then
  671. begin
  672. cp:=TranslatePlaceholderCP(cp);
  673. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  674. end
  675. else
  676. fpc_WideCharArray_To_AnsiStr:='';
  677. end;
  678. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  679. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  680. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  681. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  682. var
  683. i : SizeInt;
  684. begin
  685. if (zerobased) then
  686. begin
  687. i:=IndexWord(arr,high(arr)+1,0);
  688. if i = -1 then
  689. i := high(arr)+1;
  690. end
  691. else
  692. i := high(arr)+1;
  693. SetLength(fpc_WideCharArray_To_WideStr,i);
  694. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  695. end;
  696. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  697. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  698. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  699. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  700. var
  701. len: SizeInt;
  702. temp: ansistring;
  703. begin
  704. len := length(src);
  705. { make sure we don't dereference src if it can be nil (JM) }
  706. if len > 0 then
  707. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  708. len := length(temp);
  709. if len > length(res) then
  710. len := length(res);
  711. {$push}
  712. {$r-}
  713. move(temp[1],res[0],len);
  714. fillchar(res[len],length(res)-len,0);
  715. {$pop}
  716. end;
  717. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  718. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  719. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  720. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  721. var
  722. len: SizeInt;
  723. temp: widestring;
  724. begin
  725. len := length(src);
  726. { make sure we don't dereference src if it can be nil (JM) }
  727. if len > 0 then
  728. widestringmanager.ansi2widemoveproc(pchar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  729. len := length(temp);
  730. if len > length(res) then
  731. len := length(res);
  732. {$push}
  733. {$r-}
  734. move(temp[1],res[0],len*sizeof(widechar));
  735. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  736. {$pop}
  737. end;
  738. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  739. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  740. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  741. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  742. var
  743. len: longint;
  744. temp : widestring;
  745. begin
  746. len := length(src);
  747. { make sure we don't access char 1 if length is 0 (JM) }
  748. if len > 0 then
  749. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  750. len := length(temp);
  751. if len > length(res) then
  752. len := length(res);
  753. {$push}
  754. {$r-}
  755. move(temp[1],res[0],len*sizeof(widechar));
  756. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  757. {$pop}
  758. end;
  759. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  760. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  761. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  762. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  763. var
  764. len: SizeInt;
  765. begin
  766. len := length(src);
  767. if len > length(res) then
  768. len := length(res);
  769. {$push}
  770. {$r-}
  771. { make sure we don't try to access element 1 of the widestring if it's nil }
  772. if len > 0 then
  773. move(src[1],res[0],len*SizeOf(WideChar));
  774. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  775. {$pop}
  776. end;
  777. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  778. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  779. {$define FPC_HAS_UNICODESTR_COMPARE}
  780. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  781. {
  782. Compares 2 UnicodeStrings;
  783. The result is
  784. <0 if S1<S2
  785. 0 if S1=S2
  786. >0 if S1>S2
  787. }
  788. Var
  789. MaxI,Temp : SizeInt;
  790. begin
  791. if pointer(S1)=pointer(S2) then
  792. begin
  793. fpc_UnicodeStr_Compare:=0;
  794. exit;
  795. end;
  796. Maxi:=Length(S1);
  797. temp:=Length(S2);
  798. If MaxI>Temp then
  799. MaxI:=Temp;
  800. Temp:=CompareWord(S1[1],S2[1],MaxI);
  801. if temp=0 then
  802. temp:=Length(S1)-Length(S2);
  803. fpc_UnicodeStr_Compare:=Temp;
  804. end;
  805. {$endif FPC_HAS_UNICODESTR_COMPARE}
  806. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  807. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  808. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  809. {
  810. Compares 2 UnicodeStrings for equality only;
  811. The result is
  812. 0 if S1=S2
  813. <>0 if S1<>S2
  814. }
  815. Var
  816. MaxI : SizeInt;
  817. begin
  818. if pointer(S1)=pointer(S2) then
  819. exit(0);
  820. Maxi:=Length(S1);
  821. If MaxI<>Length(S2) then
  822. exit(-1)
  823. else
  824. exit(CompareWord(S1[1],S2[1],MaxI));
  825. end;
  826. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  827. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  828. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  829. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  830. begin
  831. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  832. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  833. end;
  834. Procedure fpc_UnicodeStr_ZeroBased_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_ZEROBASED_RANGECHECK']; compilerproc;
  835. begin
  836. if (p=nil) or (index>=PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<0) then
  837. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  838. end;
  839. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  840. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  841. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  842. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  843. {
  844. Sets The length of string S to L.
  845. Makes sure S is unique, and contains enough room.
  846. }
  847. Var
  848. Temp : Pointer;
  849. movelen: SizeInt;
  850. nl,lens, lena : SizeUInt;
  851. begin
  852. nl:=l;
  853. if (l>0) then
  854. begin
  855. if Pointer(S)=nil then
  856. begin
  857. { Need a complete new string...}
  858. Pointer(s):=NewUnicodeString(nl);
  859. end
  860. else
  861. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  862. begin
  863. Temp:=Pointer(s)-UnicodeFirstOff;
  864. lens:=MemSize(Temp);
  865. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  866. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  867. begin
  868. reallocmem(Temp, lena);
  869. Pointer(S):=Temp+UnicodeFirstOff;
  870. end;
  871. end
  872. else
  873. begin
  874. { Reallocation is needed... }
  875. Temp:=NewUnicodeString(nL);
  876. if Length(S)>0 then
  877. begin
  878. if l < succ(length(s)) then
  879. movelen := l
  880. { also move terminating null }
  881. else
  882. movelen := succ(length(s));
  883. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  884. end;
  885. fpc_unicodestr_decr_ref(Pointer(S));
  886. Pointer(S):=Temp;
  887. end;
  888. { Force nil termination in case it gets shorter }
  889. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  890. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=nl;
  891. end
  892. else { length=0, deallocate the string }
  893. fpc_unicodestr_decr_ref (Pointer(S));
  894. end;
  895. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  896. {*****************************************************************************
  897. Public functions, In interface.
  898. *****************************************************************************}
  899. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  900. begin
  901. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  902. end;
  903. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  904. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  905. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  906. begin
  907. result:=StringToWideChar(Src,Dest,DestSize);
  908. end;
  909. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  910. function WideCharToString(S : PWideChar) : UnicodeString;
  911. begin
  912. result:=WideCharLenToString(s,Length(WideString(s)));
  913. end;
  914. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  915. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  916. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  917. var
  918. temp: widestring;
  919. Len: SizeInt;
  920. begin
  921. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  922. Len:=Length(temp);
  923. if DestSize<=Len then
  924. Len:=Destsize-1;
  925. move(temp[1],Dest^,Len*SizeOf(WideChar));
  926. Dest[Len]:=#0;
  927. result:=Dest;
  928. end;
  929. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  930. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  931. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  932. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  933. begin
  934. SetLength(result,Len);
  935. Move(S^,Pointer(Result)^,Len*2);
  936. end;
  937. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  938. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  939. begin
  940. Dest:=UnicodeCharLenToString(Src,Len);
  941. end;
  942. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  943. begin
  944. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  945. end;
  946. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  947. begin
  948. Dest:=AnsiString(UnicodeCharToString(S));
  949. end;
  950. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  951. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  952. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  953. begin
  954. SetLength(result,Len);
  955. Move(S^,Pointer(Result)^,Len*2);
  956. end;
  957. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  958. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  959. begin
  960. Dest:=WideCharLenToString(Src,Len);
  961. end;
  962. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  963. begin
  964. Dest:=AnsiString(WideCharLenToString(Src,Len));
  965. end;
  966. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  967. begin
  968. Dest:=WideCharToString(S);
  969. end;
  970. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  971. begin
  972. Dest:=AnsiString(WideCharToString(S));
  973. end;
  974. Function fpc_unicodestr_Unique_func(Var S : UnicodeString): Pointer; external name 'FPC_UNICODESTR_UNIQUE';
  975. Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
  976. begin
  977. fpc_unicodestr_Unique_func(S);
  978. end;
  979. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  980. {$define FPC_HAS_UNICODESTR_UNIQUE}
  981. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  982. {
  983. Make sure reference count of S is 1,
  984. using copy-on-write semantics.
  985. }
  986. Var
  987. SNew : Pointer;
  988. L : SizeInt;
  989. begin
  990. pointer(result) := pointer(s);
  991. If Pointer(S)=Nil then
  992. exit;
  993. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  994. begin
  995. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  996. SNew:=NewUnicodeString (L);
  997. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  998. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  999. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1000. pointer(S):=SNew;
  1001. pointer(result):=SNew;
  1002. end;
  1003. end;
  1004. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1005. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1006. {$define FPC_HAS_UNICODESTR_COPY}
  1007. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1008. var
  1009. ResultAddress : Pointer;
  1010. begin
  1011. ResultAddress:=Nil;
  1012. dec(index);
  1013. if Index < 0 then
  1014. Index := 0;
  1015. { Check Size. Accounts for Zero-length S, the double check is needed because
  1016. Size can be maxint and will get <0 when adding index }
  1017. if (Size>Length(S)) or
  1018. (Index+Size>Length(S)) then
  1019. Size:=Length(S)-Index;
  1020. If Size>0 then
  1021. begin
  1022. ResultAddress:=NewUnicodeString(Size);
  1023. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1024. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1025. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1026. end;
  1027. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1028. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1029. end;
  1030. {$endif FPC_HAS_UNICODESTR_COPY}
  1031. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1032. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1033. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1034. var
  1035. i,MaxLen : SizeInt;
  1036. pc : punicodechar;
  1037. begin
  1038. Pos:=0;
  1039. if (Length(SubStr)>0) and (Offset>0) and (Offset<=Length(Source)) then
  1040. begin
  1041. MaxLen:=Length(source)-Length(SubStr)-(OffSet-1);
  1042. i:=0;
  1043. pc:=@source[OffSet];
  1044. while (i<=MaxLen) do
  1045. begin
  1046. inc(i);
  1047. if (SubStr[1]=pc^) and
  1048. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1049. begin
  1050. Pos:=Offset+i-1;
  1051. exit;
  1052. end;
  1053. inc(pc);
  1054. end;
  1055. end;
  1056. end;
  1057. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1058. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1059. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1060. { Faster version for a unicodechar alone }
  1061. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1062. var
  1063. i: SizeInt;
  1064. pc : punicodechar;
  1065. begin
  1066. if (Offset>0) and (Offset<=length(s)) then
  1067. begin
  1068. pc:=@s[OffSet];
  1069. for i:=OffSet to length(s) do
  1070. begin
  1071. if pc^=c then
  1072. begin
  1073. pos:=i;
  1074. exit;
  1075. end;
  1076. inc(pc);
  1077. end;
  1078. end;
  1079. pos:=0;
  1080. end;
  1081. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1082. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1083. block, which is significant bloat without any sensible speed improvement. }
  1084. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1085. begin
  1086. result:=Pos(UnicodeString(c),s,offset);
  1087. end;
  1088. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1089. begin
  1090. result:=Pos(UnicodeString(c),s,OffSet);
  1091. end;
  1092. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1093. begin
  1094. result:=Pos(c,UnicodeString(s),OffSet);
  1095. end;
  1096. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1097. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1098. { Faster version for a char alone. Must be implemented because }
  1099. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1100. { using pos(char,pchar) will always call the shortstring version }
  1101. { (exact match for first argument), also with $h+ (JM) }
  1102. Function Pos (c : Char; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1103. var
  1104. i: SizeInt;
  1105. wc : unicodechar;
  1106. pc : punicodechar;
  1107. begin
  1108. if (Offset>0) and (Offset<=Length(S)) then
  1109. begin
  1110. wc:=c;
  1111. pc:=@s[OffSet];
  1112. for i:=OffSet to length(s) do
  1113. begin
  1114. if pc^=wc then
  1115. begin
  1116. pos:=i;
  1117. exit;
  1118. end;
  1119. inc(pc);
  1120. end;
  1121. end;
  1122. pos:=0;
  1123. end;
  1124. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1125. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1126. {$define FPC_HAS_DELETE_UNICODESTR}
  1127. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1128. Var
  1129. LS : SizeInt;
  1130. begin
  1131. LS:=Length(S);
  1132. if (Index>LS) or (Index<=0) or (Size<=0) then
  1133. exit;
  1134. UniqueString (S);
  1135. { (Size+Index) will overflow if Size=MaxInt. }
  1136. if Size>LS-Index then
  1137. Size:=LS-Index+1;
  1138. if Size<=LS-Index then
  1139. begin
  1140. Dec(Index);
  1141. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1142. end;
  1143. Setlength(s,LS-Size);
  1144. end;
  1145. {$endif FPC_HAS_DELETE_UNICODESTR}
  1146. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1147. {$define FPC_HAS_INSERT_UNICODESTR}
  1148. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1149. var
  1150. Temp : UnicodeString;
  1151. LS : SizeInt;
  1152. begin
  1153. If Length(Source)=0 then
  1154. exit;
  1155. if index <= 0 then
  1156. index := 1;
  1157. Ls:=Length(S);
  1158. if index > LS then
  1159. index := LS+1;
  1160. Dec(Index);
  1161. SetLength(Temp,Length(Source)+LS);
  1162. If Index>0 then
  1163. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1164. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1165. If (LS-Index)>0 then
  1166. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1167. S:=Temp;
  1168. end;
  1169. {$endif FPC_HAS_INSERT_UNICODESTR}
  1170. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1171. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1172. Function UpCase(c:UnicodeChar):UnicodeChar;
  1173. begin
  1174. if (word(c)>=Ord('a')) and (word(c)<=Ord('z')) then
  1175. Result:= UnicodeChar(word(c)-32)
  1176. else
  1177. if word(c)>=128 then
  1178. Result:= widestringmanager.UpperUnicodeStringProc(UnicodeString(c))[1]
  1179. else
  1180. Result:= c;
  1181. end;
  1182. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1183. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1184. {$define FPC_HAS_UPCASE_UNICODESTR}
  1185. function UpCase(const s : UnicodeString) : UnicodeString;
  1186. begin
  1187. result:=widestringmanager.UpperUnicodeStringProc(s);
  1188. end;
  1189. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1190. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1191. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1192. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1193. begin
  1194. if (word(c)>=Ord('A')) and (word(c)<=Ord('Z')) then
  1195. Result:= UnicodeChar(word(c)+32)
  1196. else
  1197. if word(c)>=128 then
  1198. Result:= widestringmanager.LowerUnicodeStringProc(UnicodeString(c))[1]
  1199. else
  1200. Result:= c;
  1201. end;
  1202. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1203. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1204. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1205. function LowerCase(const s : UnicodeString) : UnicodeString;
  1206. begin
  1207. result:=widestringmanager.LowerUnicodeStringProc(s);
  1208. end;
  1209. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1210. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1211. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1212. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1213. begin
  1214. SetLength(S,Len);
  1215. If (Buf<>Nil) and (Len>0) then
  1216. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1217. end;
  1218. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1219. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1220. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1221. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1222. begin
  1223. If (Buf<>Nil) and (Len>0) then
  1224. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1225. else
  1226. SetLength(S,Len);
  1227. end;
  1228. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1229. {$ifndef FPUNONE}
  1230. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1231. Var
  1232. SS: ShortString;
  1233. begin
  1234. fpc_Val_Real_UnicodeStr:=0;
  1235. if length(S)>255 then
  1236. code:=256
  1237. else
  1238. begin
  1239. SS:=ShortString(S);
  1240. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1241. end;
  1242. end;
  1243. {$endif}
  1244. {$ifndef FPC_STR_ENUM_INTERN}
  1245. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1246. var
  1247. ss: ShortString;
  1248. begin
  1249. if length(s)>255 then
  1250. code:=256
  1251. else
  1252. begin
  1253. ss:=ShortString(s);
  1254. val(ss,fpc_val_enum_unicodestr,code);
  1255. end;
  1256. end;
  1257. {$endif FPC_STR_ENUM_INTERN}
  1258. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1259. Var
  1260. SS: ShortString;
  1261. begin
  1262. if length(S)>255 then
  1263. begin
  1264. fpc_Val_Currency_UnicodeStr:=0;
  1265. code:=256;
  1266. end
  1267. else
  1268. begin
  1269. SS:=ShortString(S);
  1270. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1271. end;
  1272. end;
  1273. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1274. Var
  1275. SS: ShortString;
  1276. begin
  1277. fpc_Val_UInt_UnicodeStr:=0;
  1278. if length(S)>255 then
  1279. code:=256
  1280. else
  1281. begin
  1282. SS:=ShortString(S);
  1283. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1284. end;
  1285. end;
  1286. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1287. Var
  1288. SS: ShortString;
  1289. begin
  1290. fpc_Val_SInt_UnicodeStr:=0;
  1291. if length(S)>255 then
  1292. code:=256
  1293. else
  1294. begin
  1295. SS:=ShortString(S);
  1296. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1297. end;
  1298. end;
  1299. {$ifndef CPU64}
  1300. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1301. Var
  1302. SS: ShortString;
  1303. begin
  1304. fpc_Val_qword_UnicodeStr:=0;
  1305. if length(S)>255 then
  1306. code:=256
  1307. else
  1308. begin
  1309. SS:=ShortString(S);
  1310. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1311. end;
  1312. end;
  1313. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1314. Var
  1315. SS: ShortString;
  1316. begin
  1317. fpc_Val_int64_UnicodeStr:=0;
  1318. if length(S)>255 then
  1319. code:=256
  1320. else
  1321. begin
  1322. SS:=ShortString(S);
  1323. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1324. end;
  1325. end;
  1326. {$endif CPU64}
  1327. {$if defined(CPU16) or defined(CPU8)}
  1328. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1329. Var
  1330. SS: ShortString;
  1331. begin
  1332. fpc_Val_longword_UnicodeStr:=0;
  1333. if length(S)>255 then
  1334. code:=256
  1335. else
  1336. begin
  1337. SS:=ShortString(S);
  1338. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1339. end;
  1340. end;
  1341. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1342. Var
  1343. SS: ShortString;
  1344. begin
  1345. fpc_Val_longint_UnicodeStr:=0;
  1346. if length(S)>255 then
  1347. code:=256
  1348. else
  1349. begin
  1350. SS:=ShortString(S);
  1351. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1352. end;
  1353. end;
  1354. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1355. Var
  1356. SS: ShortString;
  1357. begin
  1358. fpc_Val_word_UnicodeStr:=0;
  1359. if length(S)>255 then
  1360. code:=256
  1361. else
  1362. begin
  1363. SS:=ShortString(S);
  1364. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1365. end;
  1366. end;
  1367. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1368. Var
  1369. SS: ShortString;
  1370. begin
  1371. fpc_Val_smallint_UnicodeStr:=0;
  1372. if length(S)>255 then
  1373. code:=256
  1374. else
  1375. begin
  1376. SS:=ShortString(S);
  1377. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1378. end;
  1379. end;
  1380. {$endif CPU16 or CPU8}
  1381. {$ifndef FPUNONE}
  1382. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1383. var
  1384. ss: shortstring;
  1385. begin
  1386. str_real(len,fr,d,treal_type(rt),ss);
  1387. s:=UnicodeString(ss);
  1388. end;
  1389. {$endif}
  1390. {$ifndef FPC_STR_ENUM_INTERN}
  1391. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1392. var
  1393. ss: ShortString;
  1394. begin
  1395. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1396. s:=UnicodeString(ss);
  1397. end;
  1398. {$endif FPC_STR_ENUM_INTERN}
  1399. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1400. var
  1401. ss: ShortString;
  1402. begin
  1403. fpc_shortstr_bool(b,len,ss);
  1404. s:=UnicodeString(ss);
  1405. end;
  1406. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1407. var
  1408. ss: shortstring;
  1409. begin
  1410. str(c:len:fr,ss);
  1411. s:=UnicodeString(ss);
  1412. end;
  1413. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1414. Var
  1415. SS: ShortString;
  1416. begin
  1417. Str (v:Len,SS);
  1418. S:=UnicodeString(SS);
  1419. end;
  1420. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1421. Var
  1422. SS: ShortString;
  1423. begin
  1424. str(v:Len,SS);
  1425. S:=UnicodeString(SS);
  1426. end;
  1427. {$ifndef CPU64}
  1428. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1429. Var
  1430. SS: ShortString;
  1431. begin
  1432. Str (v:Len,SS);
  1433. S:=UnicodeString(SS);
  1434. end;
  1435. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1436. Var
  1437. SS: ShortString;
  1438. begin
  1439. str(v:Len,SS);
  1440. S:=UnicodeString(SS);
  1441. end;
  1442. {$endif CPU64}
  1443. {$if defined(CPU16) or defined(CPU8)}
  1444. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1445. Var
  1446. SS: ShortString;
  1447. begin
  1448. Str (v:Len,SS);
  1449. S:=UnicodeString(SS);
  1450. end;
  1451. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1452. Var
  1453. SS: ShortString;
  1454. begin
  1455. str(v:Len,SS);
  1456. S:=UnicodeString(SS);
  1457. end;
  1458. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1459. Var
  1460. SS: ShortString;
  1461. begin
  1462. Str (v:Len,SS);
  1463. S:=UnicodeString(SS);
  1464. end;
  1465. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1466. Var
  1467. SS: ShortString;
  1468. begin
  1469. str(v:Len,SS);
  1470. S:=UnicodeString(SS);
  1471. end;
  1472. {$endif CPU16 or CPU8}
  1473. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1474. begin
  1475. if assigned(Source) then
  1476. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1477. else
  1478. Result:=0;
  1479. end;
  1480. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1481. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1482. begin
  1483. runerror(217);
  1484. end;
  1485. {$else EXCLUDE_COMPLEX_PROCS}
  1486. var
  1487. i,j : SizeUInt;
  1488. lw : longword;
  1489. begin
  1490. result:=0;
  1491. if source=nil then
  1492. exit;
  1493. i:=0;
  1494. j:=0;
  1495. if assigned(Dest) then
  1496. begin
  1497. while (i<SourceChars) and (j<MaxDestBytes) do
  1498. begin
  1499. lw:=ord(Source[i]);
  1500. case lw of
  1501. 0..$7f:
  1502. begin
  1503. Dest[j]:=char(lw);
  1504. inc(j);
  1505. end;
  1506. $80..$7ff:
  1507. begin
  1508. if j+1>=MaxDestBytes then
  1509. break;
  1510. Dest[j]:=char($c0 or (lw shr 6));
  1511. Dest[j+1]:=char($80 or (lw and $3f));
  1512. inc(j,2);
  1513. end;
  1514. $800..$d7ff,$e000..$ffff:
  1515. begin
  1516. if j+2>=MaxDestBytes then
  1517. break;
  1518. Dest[j]:=char($e0 or (lw shr 12));
  1519. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1520. Dest[j+2]:=char($80 or (lw and $3f));
  1521. inc(j,3);
  1522. end;
  1523. $d800..$dbff:
  1524. {High Surrogates}
  1525. begin
  1526. if j+3>=MaxDestBytes then
  1527. break;
  1528. if (i+1<sourcechars) and
  1529. (word(Source[i+1]) >= $dc00) and
  1530. (word(Source[i+1]) <= $dfff) then
  1531. begin
  1532. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1533. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1534. Dest[j]:=char($f0 or (lw shr 18));
  1535. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1536. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1537. Dest[j+3]:=char($80 or (lw and $3f));
  1538. inc(j,4);
  1539. inc(i);
  1540. end;
  1541. end;
  1542. end;
  1543. inc(i);
  1544. end;
  1545. if j>SizeUInt(MaxDestBytes-1) then
  1546. j:=MaxDestBytes-1;
  1547. Dest[j]:=#0;
  1548. end
  1549. else
  1550. begin
  1551. while i<SourceChars do
  1552. begin
  1553. case word(Source[i]) of
  1554. $0..$7f:
  1555. inc(j);
  1556. $80..$7ff:
  1557. inc(j,2);
  1558. $800..$d7ff,$e000..$ffff:
  1559. inc(j,3);
  1560. $d800..$dbff:
  1561. begin
  1562. if (i+1<sourcechars) and
  1563. (word(Source[i+1]) >= $dc00) and
  1564. (word(Source[i+1]) <= $dfff) then
  1565. begin
  1566. inc(j,4);
  1567. inc(i);
  1568. end;
  1569. end;
  1570. end;
  1571. inc(i);
  1572. end;
  1573. end;
  1574. result:=j+1;
  1575. end;
  1576. {$endif EXCLUDE_COMPLEX_PROCS}
  1577. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1578. begin
  1579. if assigned(Source) then
  1580. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
  1581. else
  1582. Result:=0;
  1583. end;
  1584. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1585. begin
  1586. Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
  1587. end;
  1588. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
  1589. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1590. begin
  1591. runerror(217);
  1592. end;
  1593. {$else EXCLUDE_COMPLEX_PROCS}
  1594. const
  1595. UNICODE_INVALID=63;
  1596. var
  1597. InputUTF8: SizeUInt;
  1598. IBYTE: BYTE;
  1599. OutputUnicode: SizeUInt;
  1600. PRECHAR: SizeUInt;
  1601. TempBYTE: BYTE;
  1602. CharLen: SizeUint;
  1603. LookAhead: SizeUInt;
  1604. UC: SizeUInt;
  1605. begin
  1606. if not assigned(Source) then
  1607. begin
  1608. result:=0;
  1609. exit;
  1610. end;
  1611. result:=SizeUInt(-1);
  1612. InputUTF8:=0;
  1613. OutputUnicode:=0;
  1614. PreChar:=0;
  1615. if Assigned(Dest) Then
  1616. begin
  1617. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1618. begin
  1619. IBYTE:=byte(Source[InputUTF8]);
  1620. if (IBYTE and $80) = 0 then
  1621. begin
  1622. // One character US-ASCII, convert it to unicode
  1623. // Commented code to convert LF to CRLF has been removed
  1624. Dest[OutputUnicode]:=WideChar(IBYTE);
  1625. inc(OutputUnicode);
  1626. PreChar:=IBYTE;
  1627. inc(InputUTF8);
  1628. end
  1629. else
  1630. begin
  1631. TempByte:=IBYTE;
  1632. CharLen:=0;
  1633. while (TempBYTE and $80)<>0 do
  1634. begin
  1635. TempBYTE:=(TempBYTE shl 1) and $FE;
  1636. inc(CharLen);
  1637. end;
  1638. //Test for the "CharLen" conforms UTF-8 string
  1639. //This means the 10xxxxxx pattern.
  1640. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1641. begin
  1642. //Insuficient chars in string to decode
  1643. //UTF-8 array. Fallback to single char.
  1644. CharLen:= 1;
  1645. end;
  1646. for LookAhead := 1 to CharLen-1 do
  1647. begin
  1648. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1649. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1650. begin
  1651. //Invalid UTF-8 sequence, fallback.
  1652. CharLen:= LookAhead;
  1653. break;
  1654. end;
  1655. end;
  1656. UC:=$FFFF;
  1657. case CharLen of
  1658. 1: begin
  1659. //Not valid UTF-8 sequence
  1660. UC:=UNICODE_INVALID;
  1661. end;
  1662. 2: begin
  1663. //Two bytes UTF, convert it
  1664. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1665. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1666. if UC <= $7F then
  1667. begin
  1668. //Invalid UTF sequence.
  1669. UC:=UNICODE_INVALID;
  1670. end;
  1671. end;
  1672. 3: begin
  1673. //Three bytes, convert it to unicode
  1674. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1675. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1676. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1677. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1678. begin
  1679. //Invalid UTF-8 sequence
  1680. UC:= UNICODE_INVALID;
  1681. End;
  1682. end;
  1683. 4: begin
  1684. //Four bytes, convert it to two unicode characters
  1685. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1686. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1687. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1688. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1689. if (UC < $10000) or (UC > $10FFFF) then
  1690. begin
  1691. UC:= UNICODE_INVALID;
  1692. end
  1693. else
  1694. begin
  1695. { only store pair if room }
  1696. dec(UC,$10000);
  1697. if (OutputUnicode<MaxDestChars-1) then
  1698. begin
  1699. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1700. inc(OutputUnicode);
  1701. UC:=(UC and $3ff) + $DC00;
  1702. end
  1703. else
  1704. begin
  1705. InputUTF8:= InputUTF8 + CharLen;
  1706. { don't store anything }
  1707. CharLen:=0;
  1708. end;
  1709. end;
  1710. end;
  1711. 5,6,7: begin
  1712. //Invalid UTF8 to unicode conversion,
  1713. //mask it as invalid UNICODE too.
  1714. UC:=UNICODE_INVALID;
  1715. end;
  1716. end;
  1717. if CharLen > 0 then
  1718. begin
  1719. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1720. HandleError(231); // Will be converted to EConversionError in sysutils
  1721. PreChar:=UC;
  1722. Dest[OutputUnicode]:=WideChar(UC);
  1723. inc(OutputUnicode);
  1724. end;
  1725. InputUTF8:= InputUTF8 + CharLen;
  1726. end;
  1727. end;
  1728. Result:=OutputUnicode+1;
  1729. end
  1730. else
  1731. begin
  1732. while (InputUTF8<SourceBytes) do
  1733. begin
  1734. IBYTE:=byte(Source[InputUTF8]);
  1735. if (IBYTE and $80) = 0 then
  1736. begin
  1737. // One character US-ASCII, convert it to unicode
  1738. // Commented code to convert LF to CRLF has been removed
  1739. inc(OutputUnicode);
  1740. PreChar:=IBYTE;
  1741. inc(InputUTF8);
  1742. end
  1743. else
  1744. begin
  1745. TempByte:=IBYTE;
  1746. CharLen:=0;
  1747. while (TempBYTE and $80)<>0 do
  1748. begin
  1749. TempBYTE:=(TempBYTE shl 1) and $FE;
  1750. inc(CharLen);
  1751. end;
  1752. //Test for the "CharLen" conforms UTF-8 string
  1753. //This means the 10xxxxxx pattern.
  1754. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1755. begin
  1756. //Insuficient chars in string to decode
  1757. //UTF-8 array. Fallback to single char.
  1758. CharLen:= 1;
  1759. end;
  1760. for LookAhead := 1 to CharLen-1 do
  1761. begin
  1762. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1763. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1764. begin
  1765. //Invalid UTF-8 sequence, fallback.
  1766. CharLen:= LookAhead;
  1767. break;
  1768. end;
  1769. end;
  1770. UC:=$FFFF;
  1771. case CharLen of
  1772. 1: begin
  1773. //Not valid UTF-8 sequence
  1774. UC:=UNICODE_INVALID;
  1775. end;
  1776. 2: begin
  1777. //Two bytes UTF, convert it
  1778. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1779. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1780. if UC <= $7F then
  1781. begin
  1782. //Invalid UTF sequence.
  1783. UC:=UNICODE_INVALID;
  1784. end;
  1785. end;
  1786. 3: begin
  1787. //Three bytes, convert it to unicode
  1788. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1789. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1790. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1791. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1792. begin
  1793. //Invalid UTF-8 sequence
  1794. UC:= UNICODE_INVALID;
  1795. end;
  1796. end;
  1797. 4: begin
  1798. //Four bytes, convert it to two unicode characters
  1799. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1800. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1801. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1802. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1803. if (UC < $10000) or (UC > $10FFFF) then
  1804. UC:= UNICODE_INVALID
  1805. else
  1806. { extra character character }
  1807. inc(OutputUnicode);
  1808. end;
  1809. 5,6,7: begin
  1810. //Invalid UTF8 to unicode conversion,
  1811. //mask it as invalid UNICODE too.
  1812. UC:=UNICODE_INVALID;
  1813. end;
  1814. end;
  1815. if CharLen > 0 then
  1816. begin
  1817. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1818. HandleError(231); // Will be converted to EConversionError in sysutils
  1819. PreChar:=UC;
  1820. inc(OutputUnicode);
  1821. end;
  1822. InputUTF8:= InputUTF8 + CharLen;
  1823. end;
  1824. end;
  1825. Result:=OutputUnicode+1;
  1826. end;
  1827. end;
  1828. {$endif EXCLUDE_COMPLEX_PROCS}
  1829. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1830. begin
  1831. Result:=UTF8Encode(UnicodeString(s));
  1832. end;
  1833. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1834. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1835. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1836. var
  1837. i : SizeInt;
  1838. hs : UTF8String;
  1839. begin
  1840. result:='';
  1841. if s='' then
  1842. exit;
  1843. SetLength(hs,length(s)*3);
  1844. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1845. if i>0 then
  1846. begin
  1847. SetLength(hs,i-1);
  1848. result:=hs;
  1849. end;
  1850. end;
  1851. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1852. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1853. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1854. function UTF8Decode(const s : RawByteString): UnicodeString;
  1855. var
  1856. i : SizeInt;
  1857. hs : UnicodeString;
  1858. begin
  1859. result:='';
  1860. if s='' then
  1861. exit;
  1862. SetLength(hs,length(s));
  1863. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1864. if i>0 then
  1865. begin
  1866. SetLength(hs,i-1);
  1867. result:=hs;
  1868. end;
  1869. end;
  1870. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1871. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1872. begin
  1873. Result:=Utf8Encode(s);
  1874. end;
  1875. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1876. begin
  1877. Result:=RawByteString(Utf8Decode(s));
  1878. end;
  1879. {$ifdef FPC_HAS_FEATURE_DYNARRAYS}
  1880. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1881. var
  1882. i, reslen: sizeint;
  1883. w: longint;
  1884. begin
  1885. reslen:=0;
  1886. i:=0;
  1887. { calculate required length }
  1888. while (i<len) do
  1889. begin
  1890. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1891. inc(i)
  1892. else if (p[i]<=#$dbff) and
  1893. (i+1<len) and
  1894. (p[i+1]>=#$dc00) and
  1895. (p[i+1]<=#$dfff) then
  1896. inc(i,2)
  1897. else
  1898. inc(i);
  1899. inc(reslen);
  1900. end;
  1901. SetLength(res,reslen+1); { +1 for null termination }
  1902. reslen:=0;
  1903. i:=0;
  1904. { do conversion }
  1905. while (i<len) do
  1906. begin
  1907. w:=ord(p[i]);
  1908. if (w<=$d7ff) or (w>=$e000) then
  1909. res[reslen]:=w
  1910. else if (w<=$dbff) and
  1911. (i+1<len) and
  1912. (p[i+1]>=#$dc00) and
  1913. (p[i+1]<=#$dfff) then
  1914. begin
  1915. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1916. inc(i);
  1917. end
  1918. else { invalid surrogate pair }
  1919. res[reslen]:=w;
  1920. inc(i);
  1921. inc(reslen);
  1922. end;
  1923. res[reslen]:=0;
  1924. end;
  1925. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1926. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1927. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1928. begin
  1929. UCS4Encode(PWideChar(s),Length(s),result);
  1930. end;
  1931. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1932. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1933. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1934. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1935. begin
  1936. UCS4Encode(PWideChar(s),Length(s),result);
  1937. end;
  1938. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1939. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1940. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1941. { dest should point to previously allocated wide/unicodestring }
  1942. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1943. var
  1944. i: sizeint;
  1945. nc: UCS4Char;
  1946. begin
  1947. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1948. begin
  1949. nc:=s[i];
  1950. if (nc<=$ffff) then
  1951. dest^:=widechar(nc)
  1952. else if (dword(nc)<=$10ffff) then
  1953. begin
  1954. dest^:=widechar(nc shr 10 + $d7c0);
  1955. { subtracting $10000 doesn't change low 10 bits }
  1956. dest[1]:=widechar(nc and $3ff + $dc00);
  1957. inc(dest);
  1958. end
  1959. else { invalid code point }
  1960. dest^:='?';
  1961. inc(dest);
  1962. end;
  1963. end;
  1964. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1965. var
  1966. i : SizeInt;
  1967. reslen : SizeInt;
  1968. begin
  1969. reslen:=0;
  1970. for i:=0 to length(s)-2 do { skip terminating #0 }
  1971. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1972. SetLength(result,reslen);
  1973. UCS4Decode(s,pointer(result));
  1974. end;
  1975. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1976. var
  1977. i : SizeInt;
  1978. reslen : SizeInt;
  1979. begin
  1980. reslen:=0;
  1981. for i:=0 to length(s)-2 do { skip terminating #0 }
  1982. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1983. SetLength(result,reslen);
  1984. UCS4Decode(s,pointer(result));
  1985. end;
  1986. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1987. {$endif FPC_HAS_FEATURE_DYNARRAYS}
  1988. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1989. const
  1990. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  1991. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  1992. procedure unimplementedunicodestring;
  1993. begin
  1994. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1995. {$ifndef HAS_WIDESTRINGMANAGER}
  1996. If IsConsole then
  1997. begin
  1998. Writeln(StdErr,SNoUnicodestrings);
  1999. Writeln(StdErr,SRecompileWithUnicodestrings);
  2000. end;
  2001. {$endif HAS_WIDESTRINGMANAGER}
  2002. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2003. HandleErrorAddrFrameInd(234{RuntimeErrorExitCodes[reCodesetConversion]},get_pc_addr,get_frame);
  2004. end;
  2005. function StringElementSize(const S: UnicodeString): Word; overload;
  2006. begin
  2007. if assigned(Pointer(S)) then
  2008. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2009. else
  2010. Result:=SizeOf(UnicodeChar);
  2011. end;
  2012. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2013. begin
  2014. if assigned(Pointer(S)) then
  2015. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2016. else
  2017. Result:=0;
  2018. end;
  2019. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2020. begin
  2021. {$ifdef FPC_HAS_CPSTRING}
  2022. if assigned(Pointer(S)) then
  2023. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2024. else
  2025. {$endif FPC_HAS_CPSTRING}
  2026. Result:=DefaultUnicodeCodePage;
  2027. end;
  2028. {$push}
  2029. {$warnings off}
  2030. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2031. begin
  2032. unimplementedunicodestring;
  2033. end;
  2034. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2035. begin
  2036. unimplementedunicodestring;
  2037. end;
  2038. function StubWideCase(const s: WideString): WideString;
  2039. begin
  2040. unimplementedunicodestring;
  2041. end;
  2042. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2043. begin
  2044. unimplementedunicodestring;
  2045. end;
  2046. {$pop}
  2047. procedure initunicodestringmanager;
  2048. begin
  2049. {$ifndef HAS_WIDESTRINGMANAGER}
  2050. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2051. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2052. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2053. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2054. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2055. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2056. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2057. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2058. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2059. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2060. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2061. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2062. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2063. {$endif HAS_WIDESTRINGMANAGER}
  2064. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2065. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2066. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2067. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2068. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2069. end;
  2070. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2071. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2072. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2073. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2074. Begin
  2075. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2076. DefaultFileSystemCodePage,Length(Str));
  2077. End;
  2078. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2079. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2080. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2081. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2082. Begin
  2083. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2084. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2085. End;
  2086. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2087. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2088. Begin
  2089. Result:=Str;
  2090. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2091. End;
  2092. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2093. ignore any codepage }
  2094. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2095. begin
  2096. Result := UTF8Decode(S);
  2097. end;
  2098. function UTF8ToString(const S: ShortString): UnicodeString;
  2099. Var
  2100. rs: RawByteString;
  2101. begin
  2102. rs:=S;
  2103. Result := UTF8Decode(rs);
  2104. end;
  2105. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2106. var
  2107. rs: RawByteString;
  2108. Count: Integer;
  2109. begin
  2110. Count := length(S);
  2111. SetLength(rs, Count);
  2112. if Count > 0 then
  2113. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2114. Result := UTF8ToString(rs);
  2115. end;
  2116. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2117. are as well }
  2118. {$ifndef CPUJVM}
  2119. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2120. var
  2121. rs: RawByteString;
  2122. Count: Integer;
  2123. begin
  2124. Count := Length(S);
  2125. SetLength(rs, Count);
  2126. if Count > 0 then
  2127. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2128. Result := UTF8ToString(rs);
  2129. end;
  2130. function UTF8ToString(const S: array of Byte): UnicodeString;
  2131. var
  2132. rs: RawByteString;
  2133. Count: Integer;
  2134. begin
  2135. Count := Length(S);
  2136. SetLength(rs, Count);
  2137. if Count > 0 then
  2138. fpc_pchar_ansistr_intern_charmove(pchar(@S),Low(S),rs,0,Count);
  2139. Result := UTF8ToString(rs);
  2140. end;
  2141. {$endif not CPUJVM}