ustrings.inc 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S = SizeOf(SizeInt)) :
  19. @-2*S : SizeInt for reference count;
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. end;
  40. Const
  41. UnicodeFirstOff = SizeOf(TUnicodeRec);
  42. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overridden for the Current Locale
  47. }
  48. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  49. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  50. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  51. var
  52. i : SizeInt;
  53. p : PAnsiChar;
  54. begin
  55. setlength(dest,len);
  56. if not assigned(pointer(dest)) then
  57. exit;
  58. SetCodePage(dest,cp,false);
  59. p:=pointer(dest); {SetLength guarantees that dest is unique}
  60. for i:=1 to len do
  61. begin
  62. if word(source^)<256 then
  63. p^:=char(word(source^))
  64. else
  65. p^:='?';
  66. inc(source);
  67. inc(p);
  68. end;
  69. end;
  70. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  71. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  72. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  73. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  74. var
  75. i : SizeInt;
  76. p : PUnicodeChar;
  77. begin
  78. setlength(dest,len);
  79. p:=pointer(dest); {SetLength guarantees that dest is unique}
  80. for i:=1 to len do
  81. begin
  82. p^:=unicodechar(byte(source^));
  83. inc(source);
  84. inc(p);
  85. end;
  86. end;
  87. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  88. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  89. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  90. begin
  91. DefaultCharLengthPChar:=length(Str);
  92. end;
  93. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  94. begin
  95. if str[0]<>#0 then
  96. DefaultCodePointLength:=1
  97. else
  98. DefaultCodePointLength:=0;
  99. end;
  100. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  101. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  102. begin
  103. { don't raise an exception here. We need this for text file handling }
  104. if stdcp<>scpFileSystemSingleByte then
  105. Result:=DefaultSystemCodePage
  106. else
  107. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  108. without a fully functional widestring manager that will probably cause
  109. more problems that it solves }
  110. Result:=DefaultFileSystemCodePage
  111. end;
  112. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  113. begin
  114. manager:=widestringmanager;
  115. end;
  116. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  117. begin
  118. Old:=widestringmanager;
  119. widestringmanager:=New;
  120. end;
  121. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  122. begin
  123. widestringmanager:=New;
  124. end;
  125. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  126. begin
  127. manager:=widestringmanager;
  128. end;
  129. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  130. begin
  131. Old:=widestringmanager;
  132. widestringmanager:=New;
  133. end;
  134. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  135. begin
  136. widestringmanager:=New;
  137. end;
  138. {****************************************************************************
  139. Internal functions, not in interface.
  140. ****************************************************************************}
  141. procedure UnicodeStringError;
  142. begin
  143. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  144. end;
  145. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  146. {$define FPC_HAS_NEW_UNICODESTRING}
  147. Function NewUnicodeString(Len : SizeInt) : Pointer;
  148. {
  149. Allocate a new UnicodeString on the heap.
  150. initialize it to zero length and reference count 1.
  151. }
  152. Var
  153. P : Pointer;
  154. begin
  155. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  156. If P<>Nil then
  157. begin
  158. PUnicodeRec(P)^.Len:=Len; { Initial length }
  159. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  160. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  161. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  162. inc(p,UnicodeFirstOff); { Points to string now }
  163. PUnicodeChar(P)^:=#0; { Terminating #0 }
  164. end
  165. else
  166. UnicodeStringError;
  167. NewUnicodeString:=P;
  168. end;
  169. {$endif FPC_HAS_NEW_UNICODESTRING}
  170. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  171. {$define FPC_HAS_UNICODESTR_DECR_REF}
  172. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  173. {
  174. Decreases the ReferenceCount of a non constant unicodestring;
  175. If the reference count is zero, deallocate the string;
  176. }
  177. Var
  178. p: PUnicodeRec;
  179. Begin
  180. { Zero string }
  181. if S=Nil then
  182. exit;
  183. { check for constant strings ...}
  184. p:=PUnicodeRec(S-UnicodeFirstOff);
  185. S:=nil;
  186. if p^.Ref<0 then
  187. exit;
  188. { declocked does a MT safe dec and returns true, if the counter is 0 }
  189. if declocked(p^.Ref) then
  190. FreeMem(p);
  191. end;
  192. { alias for internal use }
  193. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  194. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  195. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  196. {$define FPC_HAS_UNICODESTR_INCR_REF}
  197. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  198. Begin
  199. If S=Nil then
  200. exit;
  201. { constant string ? }
  202. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  203. exit;
  204. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  205. end;
  206. { alias for internal use }
  207. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  208. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  209. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  210. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  211. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  212. {
  213. Converts a UnicodeString to a ShortString;
  214. }
  215. Var
  216. Size : SizeInt;
  217. temp : ansistring;
  218. begin
  219. res:='';
  220. Size:=Length(S2);
  221. if Size>0 then
  222. begin
  223. If Size>high(res) then
  224. Size:=high(res);
  225. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  226. res:=temp;
  227. end;
  228. end;
  229. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  230. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  231. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  232. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  233. {
  234. Converts a ShortString to a UnicodeString;
  235. }
  236. Var
  237. Size : SizeInt;
  238. begin
  239. result:='';
  240. Size:=Length(S2);
  241. if Size>0 then
  242. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  243. end;
  244. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  245. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  246. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  247. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  248. {
  249. Converts a UnicodeString to an AnsiString
  250. }
  251. Var
  252. Size : SizeInt;
  253. {$ifndef FPC_HAS_CPSTRING}
  254. cp : TSystemCodePage;
  255. {$endif FPC_HAS_CPSTRING}
  256. begin
  257. {$ifndef FPC_HAS_CPSTRING}
  258. cp:=DefaultSystemCodePage;
  259. {$endif FPC_HAS_CPSTRING}
  260. result:='';
  261. Size:=Length(S2);
  262. if Size>0 then
  263. begin
  264. cp:=TranslatePlaceholderCP(cp);
  265. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  266. end;
  267. end;
  268. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  269. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  270. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  271. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  272. {
  273. Converts an AnsiString to a UnicodeString;
  274. }
  275. Var
  276. Size : SizeInt;
  277. cp: TSystemCodePage;
  278. begin
  279. result:='';
  280. Size:=Length(S2);
  281. if Size>0 then
  282. begin
  283. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  284. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  285. end;
  286. end;
  287. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  288. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  289. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  290. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  291. begin
  292. SetLength(Result,Length(S2));
  293. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  294. end;
  295. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  296. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  297. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  298. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  304. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  305. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  306. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  307. var
  308. Size : SizeInt;
  309. begin
  310. result:='';
  311. if p=nil then
  312. exit;
  313. Size := IndexWord(p^, -1, 0);
  314. Setlength(result,Size);
  315. if Size>0 then
  316. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  317. end;
  318. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  319. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  320. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  321. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  322. var
  323. Size : SizeInt;
  324. {$ifndef FPC_HAS_CPSTRING}
  325. cp : TSystemCodePage;
  326. {$endif FPC_HAS_CPSTRING}
  327. begin
  328. {$ifndef FPC_HAS_CPSTRING}
  329. cp:=DefaultSystemCodePage;
  330. {$endif FPC_HAS_CPSTRING}
  331. result:='';
  332. if p=nil then
  333. exit;
  334. Size := IndexWord(p^, -1, 0);
  335. if Size>0 then
  336. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  337. end;
  338. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  339. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  340. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  341. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  342. var
  343. Size : SizeInt;
  344. temp: ansistring;
  345. begin
  346. res:='';
  347. if p=nil then
  348. exit;
  349. Size:=IndexWord(p^, high(PtrInt), 0);
  350. if Size>0 then
  351. begin
  352. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  353. res:=temp;
  354. end;
  355. end;
  356. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  357. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  358. {$define FPC_UNICODESTR_ASSIGN}
  359. { checked against the ansistring routine, 2001-05-27 (FK) }
  360. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  361. {
  362. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  363. }
  364. begin
  365. If S2<>nil then
  366. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  367. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  368. { Decrease the reference count on the old S1 }
  369. fpc_unicodestr_decr_ref (S1);
  370. s1:=s2;
  371. end;
  372. { alias for internal use }
  373. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  374. {$endif FPC_UNICODESTR_ASSIGN}
  375. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  376. {$define FPC_HAS_UNICODESTR_CONCAT}
  377. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  378. Var
  379. Size,Location : SizeInt;
  380. same : boolean;
  381. begin
  382. { only assign if s1 or s2 is empty }
  383. if (S1='') then
  384. begin
  385. DestS:=s2;
  386. exit;
  387. end;
  388. if (S2='') then
  389. begin
  390. DestS:=s1;
  391. exit;
  392. end;
  393. Location:=Length(S1);
  394. Size:=length(S2);
  395. { Use Pointer() typecasts to prevent extra conversion code }
  396. if Pointer(DestS)=Pointer(S1) then
  397. begin
  398. same:=Pointer(S1)=Pointer(S2);
  399. SetLength(DestS,Size+Location);
  400. if same then
  401. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  402. else
  403. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  404. end
  405. else if Pointer(DestS)=Pointer(S2) then
  406. begin
  407. SetLength(DestS,Size+Location);
  408. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  409. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  410. end
  411. else
  412. begin
  413. DestS:='';
  414. SetLength(DestS,Size+Location);
  415. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  416. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  417. end;
  418. end;
  419. {$endif FPC_HAS_UNICODESTR_CONCAT}
  420. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  421. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  422. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  423. Var
  424. i : Longint;
  425. p,pc : pointer;
  426. Size,NewLen : SizeInt;
  427. lowstart : longint;
  428. destcopy : pointer;
  429. OldDestLen : SizeInt;
  430. begin
  431. if high(sarr)=0 then
  432. begin
  433. DestS:='';
  434. exit;
  435. end;
  436. destcopy:=nil;
  437. lowstart:=low(sarr);
  438. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  439. inc(lowstart);
  440. { Check for another reuse, then we can't use
  441. the append optimization }
  442. for i:=lowstart to high(sarr) do
  443. begin
  444. if Pointer(DestS)=Pointer(sarr[i]) then
  445. begin
  446. { if DestS is used somewhere in the middle of the expression,
  447. we need to make sure the original string still exists after
  448. we empty/modify DestS.
  449. This trick only works with reference counted strings. Therefor
  450. this optimization is disabled for WINLIKEUNICODESTRING }
  451. destcopy:=pointer(dests);
  452. fpc_UnicodeStr_Incr_Ref(destcopy);
  453. lowstart:=low(sarr);
  454. break;
  455. end;
  456. end;
  457. { Start with empty DestS if we start with concatting
  458. the first array element }
  459. if lowstart=low(sarr) then
  460. DestS:='';
  461. OldDestLen:=length(DestS);
  462. { Calculate size of the result so we can do
  463. a single call to SetLength() }
  464. NewLen:=0;
  465. for i:=low(sarr) to high(sarr) do
  466. inc(NewLen,length(sarr[i]));
  467. SetLength(DestS,NewLen);
  468. { Concat all strings, except the string we already
  469. copied in DestS }
  470. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  471. for i:=lowstart to high(sarr) do
  472. begin
  473. p:=pointer(sarr[i]);
  474. if assigned(p) then
  475. begin
  476. Size:=length(unicodestring(p));
  477. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  478. inc(pc,size*sizeof(UnicodeChar));
  479. end;
  480. end;
  481. fpc_UnicodeStr_Decr_Ref(destcopy);
  482. end;
  483. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  484. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  485. {$define FPC_HAS_CHAR_TO_UCHAR}
  486. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  487. var
  488. w: unicodestring;
  489. begin
  490. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  491. fpc_Char_To_UChar:=w[1];
  492. end;
  493. {$endif FPC_HAS_CHAR_TO_UCHAR}
  494. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  495. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  496. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  497. {
  498. Converts a Char to a UnicodeString;
  499. }
  500. begin
  501. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  502. end;
  503. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  504. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  505. {$define FPC_HAS_UCHAR_TO_CHAR}
  506. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  507. {
  508. Converts a UnicodeChar to a Char;
  509. }
  510. var
  511. s: ansistring;
  512. begin
  513. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  514. if length(s)=1 then
  515. fpc_UChar_To_Char:= s[1]
  516. else
  517. fpc_UChar_To_Char:='?';
  518. end;
  519. {$endif FPC_HAS_UCHAR_TO_CHAR}
  520. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  521. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  522. {$ifdef VER2_6}
  523. procedure fpc_UChar_To_ShortStr(out result : shortstring;const c : WideChar); compilerproc;
  524. {$else}
  525. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  526. {$endif}
  527. {
  528. Converts a WideChar to a ShortString;
  529. }
  530. var
  531. s: ansistring;
  532. begin
  533. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  534. result:=s;
  535. end;
  536. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  537. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  538. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  539. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  540. {
  541. Converts a UnicodeChar to a UnicodeString;
  542. }
  543. begin
  544. Setlength (fpc_UChar_To_UnicodeStr,1);
  545. fpc_UChar_To_UnicodeStr[1]:= c;
  546. end;
  547. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  548. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  549. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  550. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  551. {
  552. Converts a UnicodeChar to a AnsiString;
  553. }
  554. {$ifndef FPC_HAS_CPSTRING}
  555. var
  556. cp : TSystemCodePage;
  557. {$endif FPC_HAS_CPSTRING}
  558. begin
  559. {$ifndef FPC_HAS_CPSTRING}
  560. cp:=DefaultSystemCodePage;
  561. {$endif FPC_HAS_CPSTRING}
  562. cp:=TranslatePlaceholderCP(cp);
  563. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  564. end;
  565. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  566. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  567. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  568. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  569. Var
  570. L : SizeInt;
  571. begin
  572. if (not assigned(p)) or (p[0]=#0) Then
  573. begin
  574. fpc_pchar_to_unicodestr := '';
  575. exit;
  576. end;
  577. l:=IndexChar(p^,-1,#0);
  578. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  579. end;
  580. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  581. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  582. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  583. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  584. var
  585. i : SizeInt;
  586. begin
  587. if zerobased then
  588. begin
  589. if arr[0]=#0 Then
  590. begin
  591. fpc_chararray_to_unicodestr:='';
  592. exit;
  593. end;
  594. i:=IndexChar(arr,high(arr)+1,#0);
  595. if i=-1 then
  596. i:=high(arr)+1;
  597. end
  598. else
  599. i:=high(arr)+1;
  600. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  601. end;
  602. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  603. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  604. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  605. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  606. var
  607. i : SizeInt;
  608. begin
  609. if (zerobased) then
  610. begin
  611. i:=IndexWord(arr,high(arr)+1,0);
  612. if i = -1 then
  613. i := high(arr)+1;
  614. end
  615. else
  616. i := high(arr)+1;
  617. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  618. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  619. end;
  620. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  621. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  622. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  623. { due to their names, the following procedures should be in wstrings.inc,
  624. however, the compiler generates code using this functions on all platforms }
  625. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  626. var
  627. l: longint;
  628. index: ptrint;
  629. len: byte;
  630. temp: ansistring;
  631. begin
  632. l := high(arr)+1;
  633. if l>=high(res)+1 then
  634. l:=high(res)
  635. else if l<0 then
  636. l:=0;
  637. if zerobased then
  638. begin
  639. index:=IndexWord(arr[0],l,0);
  640. if index<0 then
  641. len:=l
  642. else
  643. len:=index;
  644. end
  645. else
  646. len:=l;
  647. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  648. res:=temp;
  649. end;
  650. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  651. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  652. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  653. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  654. var
  655. i : SizeInt;
  656. {$ifndef FPC_HAS_CPSTRING}
  657. cp : TSystemCodePage;
  658. {$endif FPC_HAS_CPSTRING}
  659. begin
  660. {$ifndef FPC_HAS_CPSTRING}
  661. cp:=DefaultSystemCodePage;
  662. {$endif FPC_HAS_CPSTRING}
  663. if (zerobased) then
  664. begin
  665. i:=IndexWord(arr,high(arr)+1,0);
  666. if i = -1 then
  667. i := high(arr)+1;
  668. end
  669. else
  670. i := high(arr)+1;
  671. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  672. end;
  673. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  674. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  675. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  676. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  677. var
  678. i : SizeInt;
  679. begin
  680. if (zerobased) then
  681. begin
  682. i:=IndexWord(arr,high(arr)+1,0);
  683. if i = -1 then
  684. i := high(arr)+1;
  685. end
  686. else
  687. i := high(arr)+1;
  688. SetLength(fpc_WideCharArray_To_WideStr,i);
  689. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  690. end;
  691. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  692. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  693. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  694. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  695. var
  696. len: SizeInt;
  697. temp: ansistring;
  698. begin
  699. len := length(src);
  700. { make sure we don't dereference src if it can be nil (JM) }
  701. if len > 0 then
  702. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  703. len := length(temp);
  704. if len > length(res) then
  705. len := length(res);
  706. {$push}
  707. {$r-}
  708. move(temp[1],res[0],len);
  709. fillchar(res[len],length(res)-len,0);
  710. {$pop}
  711. end;
  712. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  713. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  714. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  715. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  716. var
  717. len: SizeInt;
  718. temp: widestring;
  719. begin
  720. len := length(src);
  721. { make sure we don't dereference src if it can be nil (JM) }
  722. if len > 0 then
  723. widestringmanager.ansi2widemoveproc(pchar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  724. len := length(temp);
  725. if len > length(res) then
  726. len := length(res);
  727. {$push}
  728. {$r-}
  729. move(temp[1],res[0],len*sizeof(widechar));
  730. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  731. {$pop}
  732. end;
  733. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  734. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  735. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  736. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  737. var
  738. len: longint;
  739. temp : widestring;
  740. begin
  741. len := length(src);
  742. { make sure we don't access char 1 if length is 0 (JM) }
  743. if len > 0 then
  744. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  745. len := length(temp);
  746. if len > length(res) then
  747. len := length(res);
  748. {$push}
  749. {$r-}
  750. move(temp[1],res[0],len*sizeof(widechar));
  751. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  752. {$pop}
  753. end;
  754. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  755. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  756. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  757. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  758. var
  759. len: SizeInt;
  760. begin
  761. len := length(src);
  762. if len > length(res) then
  763. len := length(res);
  764. {$push}
  765. {$r-}
  766. { make sure we don't try to access element 1 of the widestring if it's nil }
  767. if len > 0 then
  768. move(src[1],res[0],len*SizeOf(WideChar));
  769. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  770. {$pop}
  771. end;
  772. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  773. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  774. {$define FPC_HAS_UNICODESTR_COMPARE}
  775. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  776. {
  777. Compares 2 UnicodeStrings;
  778. The result is
  779. <0 if S1<S2
  780. 0 if S1=S2
  781. >0 if S1>S2
  782. }
  783. Var
  784. MaxI,Temp : SizeInt;
  785. begin
  786. if pointer(S1)=pointer(S2) then
  787. begin
  788. fpc_UnicodeStr_Compare:=0;
  789. exit;
  790. end;
  791. Maxi:=Length(S1);
  792. temp:=Length(S2);
  793. If MaxI>Temp then
  794. MaxI:=Temp;
  795. Temp:=CompareWord(S1[1],S2[1],MaxI);
  796. if temp=0 then
  797. temp:=Length(S1)-Length(S2);
  798. fpc_UnicodeStr_Compare:=Temp;
  799. end;
  800. {$endif FPC_HAS_UNICODESTR_COMPARE}
  801. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  802. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  803. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  804. {
  805. Compares 2 UnicodeStrings for equality only;
  806. The result is
  807. 0 if S1=S2
  808. <>0 if S1<>S2
  809. }
  810. Var
  811. MaxI : SizeInt;
  812. begin
  813. if pointer(S1)=pointer(S2) then
  814. exit(0);
  815. Maxi:=Length(S1);
  816. If MaxI<>Length(S2) then
  817. exit(-1)
  818. else
  819. exit(CompareWord(S1[1],S2[1],MaxI));
  820. end;
  821. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  822. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  823. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  824. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  825. begin
  826. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  827. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  828. end;
  829. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  830. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  831. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  832. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  833. {
  834. Sets The length of string S to L.
  835. Makes sure S is unique, and contains enough room.
  836. }
  837. Var
  838. Temp : Pointer;
  839. movelen: SizeInt;
  840. nl,lens, lena : SizeUInt;
  841. begin
  842. nl:=l;
  843. {$IFDEF VER2_6}
  844. nl:=nl*2;
  845. {$ENDIF}
  846. if (l>0) then
  847. begin
  848. if Pointer(S)=nil then
  849. begin
  850. { Need a complete new string...}
  851. Pointer(s):=NewUnicodeString(nl);
  852. end
  853. else
  854. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  855. begin
  856. Temp:=Pointer(s)-UnicodeFirstOff;
  857. lens:=MemSize(Temp);
  858. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  859. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  860. begin
  861. reallocmem(Temp, lena);
  862. Pointer(S):=Temp+UnicodeFirstOff;
  863. end;
  864. end
  865. else
  866. begin
  867. { Reallocation is needed... }
  868. Temp:=NewUnicodeString(nL);
  869. if Length(S)>0 then
  870. begin
  871. if l < succ(length(s)) then
  872. movelen := l
  873. { also move terminating null }
  874. else
  875. movelen := succ(length(s));
  876. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  877. end;
  878. fpc_unicodestr_decr_ref(Pointer(S));
  879. Pointer(S):=Temp;
  880. end;
  881. { Force nil termination in case it gets shorter }
  882. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  883. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=nl;
  884. end
  885. else { length=0, deallocate the string }
  886. fpc_unicodestr_decr_ref (Pointer(S));
  887. end;
  888. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  889. {*****************************************************************************
  890. Public functions, In interface.
  891. *****************************************************************************}
  892. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  893. begin
  894. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  895. end;
  896. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  897. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  898. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  899. begin
  900. result:=StringToWideChar(Src,Dest,DestSize);
  901. end;
  902. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  903. function WideCharToString(S : PWideChar) : UnicodeString;
  904. begin
  905. result:=WideCharLenToString(s,Length(WideString(s)));
  906. end;
  907. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  908. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  909. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  910. var
  911. temp: widestring;
  912. Len: SizeInt;
  913. begin
  914. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  915. Len:=Length(temp);
  916. if DestSize<=Len then
  917. Len:=Destsize-1;
  918. move(temp[1],Dest^,Len*SizeOf(WideChar));
  919. Dest[Len]:=#0;
  920. result:=Dest;
  921. end;
  922. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  923. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  924. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  925. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  926. begin
  927. SetLength(result,Len);
  928. Move(S^,Pointer(Result)^,Len*2);
  929. end;
  930. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  931. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  932. begin
  933. Dest:=UnicodeCharLenToString(Src,Len);
  934. end;
  935. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  936. begin
  937. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  938. end;
  939. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  940. begin
  941. Dest:=AnsiString(UnicodeCharToString(S));
  942. end;
  943. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  944. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  945. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  946. begin
  947. SetLength(result,Len);
  948. Move(S^,Pointer(Result)^,Len*2);
  949. end;
  950. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  951. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  952. begin
  953. Dest:=WideCharLenToString(Src,Len);
  954. end;
  955. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  956. begin
  957. Dest:=AnsiString(WideCharLenToString(Src,Len));
  958. end;
  959. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  960. begin
  961. Dest:=WideCharToString(S);
  962. end;
  963. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  964. begin
  965. Dest:=AnsiString(WideCharToString(S));
  966. end;
  967. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  968. {$define FPC_HAS_UNICODESTR_UNIQUE}
  969. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  970. {
  971. Make sure reference count of S is 1,
  972. using copy-on-write semantics.
  973. }
  974. Var
  975. SNew : Pointer;
  976. L : SizeInt;
  977. begin
  978. pointer(result) := pointer(s);
  979. If Pointer(S)=Nil then
  980. exit;
  981. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  982. begin
  983. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  984. SNew:=NewUnicodeString (L);
  985. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  986. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  987. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  988. pointer(S):=SNew;
  989. pointer(result):=SNew;
  990. end;
  991. end;
  992. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  993. {$ifndef FPC_HAS_UNICODESTR_COPY}
  994. {$define FPC_HAS_UNICODESTR_COPY}
  995. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  996. var
  997. ResultAddress : Pointer;
  998. begin
  999. ResultAddress:=Nil;
  1000. dec(index);
  1001. if Index < 0 then
  1002. Index := 0;
  1003. { Check Size. Accounts for Zero-length S, the double check is needed because
  1004. Size can be maxint and will get <0 when adding index }
  1005. if (Size>Length(S)) or
  1006. (Index+Size>Length(S)) then
  1007. Size:=Length(S)-Index;
  1008. If Size>0 then
  1009. begin
  1010. ResultAddress:=NewUnicodeString(Size);
  1011. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1012. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1013. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1014. end;
  1015. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1016. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1017. end;
  1018. {$endif FPC_HAS_UNICODESTR_COPY}
  1019. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1020. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1021. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1022. var
  1023. i,MaxLen : SizeInt;
  1024. pc : punicodechar;
  1025. begin
  1026. Pos:=0;
  1027. if (Length(SubStr)>0) and (Offset>0) and (Offset<=Length(Source)) then
  1028. begin
  1029. MaxLen:=Length(source)-Length(SubStr)-(OffSet-1);
  1030. i:=0;
  1031. pc:=@source[OffSet];
  1032. while (i<=MaxLen) do
  1033. begin
  1034. inc(i);
  1035. if (SubStr[1]=pc^) and
  1036. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1037. begin
  1038. Pos:=Offset+i-1;
  1039. exit;
  1040. end;
  1041. inc(pc);
  1042. end;
  1043. end;
  1044. end;
  1045. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1046. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1047. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1048. { Faster version for a unicodechar alone }
  1049. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1050. var
  1051. i: SizeInt;
  1052. pc : punicodechar;
  1053. begin
  1054. if (Offset>0) and (Offset<=length(s)) then
  1055. begin
  1056. pc:=@s[OffSet];
  1057. for i:=OffSet to length(s) do
  1058. begin
  1059. if pc^=c then
  1060. begin
  1061. pos:=i;
  1062. exit;
  1063. end;
  1064. inc(pc);
  1065. end;
  1066. end;
  1067. pos:=0;
  1068. end;
  1069. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1070. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1071. block, which is significant bloat without any sensible speed improvement. }
  1072. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1073. begin
  1074. result:=Pos(UnicodeString(c),s,offset);
  1075. end;
  1076. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1077. begin
  1078. result:=Pos(UnicodeString(c),s,OffSet);
  1079. end;
  1080. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1081. begin
  1082. result:=Pos(c,UnicodeString(s),OffSet);
  1083. end;
  1084. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1085. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1086. { Faster version for a char alone. Must be implemented because }
  1087. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1088. { using pos(char,pchar) will always call the shortstring version }
  1089. { (exact match for first argument), also with $h+ (JM) }
  1090. Function Pos (c : Char; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1091. var
  1092. i: SizeInt;
  1093. wc : unicodechar;
  1094. pc : punicodechar;
  1095. begin
  1096. if (Offset>0) and (Offset<=Length(S)) then
  1097. begin
  1098. wc:=c;
  1099. pc:=@s[OffSet];
  1100. for i:=OffSet to length(s) do
  1101. begin
  1102. if pc^=wc then
  1103. begin
  1104. pos:=i;
  1105. exit;
  1106. end;
  1107. inc(pc);
  1108. end;
  1109. end;
  1110. pos:=0;
  1111. end;
  1112. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1113. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1114. {$define FPC_HAS_DELETE_UNICODESTR}
  1115. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1116. Var
  1117. LS : SizeInt;
  1118. begin
  1119. LS:=Length(S);
  1120. if (Index>LS) or (Index<=0) or (Size<=0) then
  1121. exit;
  1122. UniqueString (S);
  1123. { (Size+Index) will overflow if Size=MaxInt. }
  1124. if Size>LS-Index then
  1125. Size:=LS-Index+1;
  1126. if Size<=LS-Index then
  1127. begin
  1128. Dec(Index);
  1129. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1130. end;
  1131. Setlength(s,LS-Size);
  1132. end;
  1133. {$endif FPC_HAS_DELETE_UNICODESTR}
  1134. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1135. {$define FPC_HAS_INSERT_UNICODESTR}
  1136. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1137. var
  1138. Temp : UnicodeString;
  1139. LS : SizeInt;
  1140. begin
  1141. If Length(Source)=0 then
  1142. exit;
  1143. if index <= 0 then
  1144. index := 1;
  1145. Ls:=Length(S);
  1146. if index > LS then
  1147. index := LS+1;
  1148. Dec(Index);
  1149. SetLength(Temp,Length(Source)+LS);
  1150. If Index>0 then
  1151. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1152. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1153. If (LS-Index)>0 then
  1154. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1155. S:=Temp;
  1156. end;
  1157. {$endif FPC_HAS_INSERT_UNICODESTR}
  1158. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1159. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1160. Function UpCase(c:UnicodeChar):UnicodeChar;
  1161. var
  1162. s : UnicodeString;
  1163. begin
  1164. s:=c;
  1165. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1166. end;
  1167. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1168. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1169. {$define FPC_HAS_UPCASE_UNICODESTR}
  1170. function UpCase(const s : UnicodeString) : UnicodeString;
  1171. begin
  1172. result:=widestringmanager.UpperUnicodeStringProc(s);
  1173. end;
  1174. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1175. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1176. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1177. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1178. var
  1179. s : UnicodeString;
  1180. begin
  1181. s:=c;
  1182. result:=widestringmanager.LowerUnicodeStringProc(s)[1];
  1183. end;
  1184. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1185. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1186. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1187. function LowerCase(const s : UnicodeString) : UnicodeString;
  1188. begin
  1189. result:=widestringmanager.LowerUnicodeStringProc(s);
  1190. end;
  1191. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1192. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1193. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1194. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1195. begin
  1196. SetLength(S,Len);
  1197. If (Buf<>Nil) and (Len>0) then
  1198. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1199. end;
  1200. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1201. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1202. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1203. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1204. begin
  1205. If (Buf<>Nil) and (Len>0) then
  1206. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1207. else
  1208. SetLength(S,Len);
  1209. end;
  1210. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1211. {$ifndef FPUNONE}
  1212. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1213. Var
  1214. SS: ShortString;
  1215. begin
  1216. fpc_Val_Real_UnicodeStr:=0;
  1217. if length(S)>255 then
  1218. code:=256
  1219. else
  1220. begin
  1221. SS:=ShortString(S);
  1222. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1223. end;
  1224. end;
  1225. {$endif}
  1226. {$ifndef FPC_STR_ENUM_INTERN}
  1227. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1228. var
  1229. ss: ShortString;
  1230. begin
  1231. if length(s)>255 then
  1232. code:=256
  1233. else
  1234. begin
  1235. ss:=ShortString(s);
  1236. val(ss,fpc_val_enum_unicodestr,code);
  1237. end;
  1238. end;
  1239. {$endif FPC_STR_ENUM_INTERN}
  1240. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1241. Var
  1242. SS: ShortString;
  1243. begin
  1244. if length(S)>255 then
  1245. begin
  1246. fpc_Val_Currency_UnicodeStr:=0;
  1247. code:=256;
  1248. end
  1249. else
  1250. begin
  1251. SS:=ShortString(S);
  1252. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1253. end;
  1254. end;
  1255. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1256. Var
  1257. SS: ShortString;
  1258. begin
  1259. fpc_Val_UInt_UnicodeStr:=0;
  1260. if length(S)>255 then
  1261. code:=256
  1262. else
  1263. begin
  1264. SS:=ShortString(S);
  1265. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1266. end;
  1267. end;
  1268. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1269. Var
  1270. SS: ShortString;
  1271. begin
  1272. fpc_Val_SInt_UnicodeStr:=0;
  1273. if length(S)>255 then
  1274. code:=256
  1275. else
  1276. begin
  1277. SS:=ShortString(S);
  1278. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1279. end;
  1280. end;
  1281. {$ifndef CPU64}
  1282. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1283. Var
  1284. SS: ShortString;
  1285. begin
  1286. fpc_Val_qword_UnicodeStr:=0;
  1287. if length(S)>255 then
  1288. code:=256
  1289. else
  1290. begin
  1291. SS:=ShortString(S);
  1292. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1293. end;
  1294. end;
  1295. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1296. Var
  1297. SS: ShortString;
  1298. begin
  1299. fpc_Val_int64_UnicodeStr:=0;
  1300. if length(S)>255 then
  1301. code:=256
  1302. else
  1303. begin
  1304. SS:=ShortString(S);
  1305. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1306. end;
  1307. end;
  1308. {$endif CPU64}
  1309. {$if defined(CPU16) or defined(CPU8)}
  1310. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1311. Var
  1312. SS: ShortString;
  1313. begin
  1314. fpc_Val_longword_UnicodeStr:=0;
  1315. if length(S)>255 then
  1316. code:=256
  1317. else
  1318. begin
  1319. SS:=ShortString(S);
  1320. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1321. end;
  1322. end;
  1323. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1324. Var
  1325. SS: ShortString;
  1326. begin
  1327. fpc_Val_longint_UnicodeStr:=0;
  1328. if length(S)>255 then
  1329. code:=256
  1330. else
  1331. begin
  1332. SS:=ShortString(S);
  1333. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1334. end;
  1335. end;
  1336. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1337. Var
  1338. SS: ShortString;
  1339. begin
  1340. fpc_Val_word_UnicodeStr:=0;
  1341. if length(S)>255 then
  1342. code:=256
  1343. else
  1344. begin
  1345. SS:=ShortString(S);
  1346. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1347. end;
  1348. end;
  1349. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1350. Var
  1351. SS: ShortString;
  1352. begin
  1353. fpc_Val_smallint_UnicodeStr:=0;
  1354. if length(S)>255 then
  1355. code:=256
  1356. else
  1357. begin
  1358. SS:=ShortString(S);
  1359. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1360. end;
  1361. end;
  1362. {$endif CPU16 or CPU8}
  1363. {$ifndef FPUNONE}
  1364. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1365. var
  1366. ss: shortstring;
  1367. begin
  1368. str_real(len,fr,d,treal_type(rt),ss);
  1369. s:=UnicodeString(ss);
  1370. end;
  1371. {$endif}
  1372. {$ifndef FPC_STR_ENUM_INTERN}
  1373. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1374. var
  1375. ss: ShortString;
  1376. begin
  1377. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1378. s:=UnicodeString(ss);
  1379. end;
  1380. {$endif FPC_STR_ENUM_INTERN}
  1381. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1382. var
  1383. ss: ShortString;
  1384. begin
  1385. fpc_shortstr_bool(b,len,ss);
  1386. s:=UnicodeString(ss);
  1387. end;
  1388. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1389. var
  1390. ss: shortstring;
  1391. begin
  1392. str(c:len:fr,ss);
  1393. s:=UnicodeString(ss);
  1394. end;
  1395. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1396. Var
  1397. SS: ShortString;
  1398. begin
  1399. Str (v:Len,SS);
  1400. S:=UnicodeString(SS);
  1401. end;
  1402. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1403. Var
  1404. SS: ShortString;
  1405. begin
  1406. str(v:Len,SS);
  1407. S:=UnicodeString(SS);
  1408. end;
  1409. {$ifndef CPU64}
  1410. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1411. Var
  1412. SS: ShortString;
  1413. begin
  1414. Str (v:Len,SS);
  1415. S:=UnicodeString(SS);
  1416. end;
  1417. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1418. Var
  1419. SS: ShortString;
  1420. begin
  1421. str(v:Len,SS);
  1422. S:=UnicodeString(SS);
  1423. end;
  1424. {$endif CPU64}
  1425. {$if defined(CPU16) or defined(CPU8)}
  1426. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1427. Var
  1428. SS: ShortString;
  1429. begin
  1430. Str (v:Len,SS);
  1431. S:=UnicodeString(SS);
  1432. end;
  1433. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1434. Var
  1435. SS: ShortString;
  1436. begin
  1437. str(v:Len,SS);
  1438. S:=UnicodeString(SS);
  1439. end;
  1440. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1441. Var
  1442. SS: ShortString;
  1443. begin
  1444. Str (v:Len,SS);
  1445. S:=UnicodeString(SS);
  1446. end;
  1447. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1448. Var
  1449. SS: ShortString;
  1450. begin
  1451. str(v:Len,SS);
  1452. S:=UnicodeString(SS);
  1453. end;
  1454. {$endif CPU16 or CPU8}
  1455. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1456. begin
  1457. if assigned(Source) then
  1458. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1459. else
  1460. Result:=0;
  1461. end;
  1462. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1463. var
  1464. i,j : SizeUInt;
  1465. lw : longword;
  1466. begin
  1467. result:=0;
  1468. if source=nil then
  1469. exit;
  1470. i:=0;
  1471. j:=0;
  1472. if assigned(Dest) then
  1473. begin
  1474. while (i<SourceChars) and (j<MaxDestBytes) do
  1475. begin
  1476. lw:=ord(Source[i]);
  1477. case lw of
  1478. 0..$7f:
  1479. begin
  1480. Dest[j]:=char(lw);
  1481. inc(j);
  1482. end;
  1483. $80..$7ff:
  1484. begin
  1485. if j+1>=MaxDestBytes then
  1486. break;
  1487. Dest[j]:=char($c0 or (lw shr 6));
  1488. Dest[j+1]:=char($80 or (lw and $3f));
  1489. inc(j,2);
  1490. end;
  1491. $800..$d7ff,$e000..$ffff:
  1492. begin
  1493. if j+2>=MaxDestBytes then
  1494. break;
  1495. Dest[j]:=char($e0 or (lw shr 12));
  1496. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1497. Dest[j+2]:=char($80 or (lw and $3f));
  1498. inc(j,3);
  1499. end;
  1500. $d800..$dbff:
  1501. {High Surrogates}
  1502. begin
  1503. if j+3>=MaxDestBytes then
  1504. break;
  1505. if (i+1<sourcechars) and
  1506. (word(Source[i+1]) >= $dc00) and
  1507. (word(Source[i+1]) <= $dfff) then
  1508. begin
  1509. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1510. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1511. Dest[j]:=char($f0 or (lw shr 18));
  1512. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1513. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1514. Dest[j+3]:=char($80 or (lw and $3f));
  1515. inc(j,4);
  1516. inc(i);
  1517. end;
  1518. end;
  1519. end;
  1520. inc(i);
  1521. end;
  1522. if j>SizeUInt(MaxDestBytes-1) then
  1523. j:=MaxDestBytes-1;
  1524. Dest[j]:=#0;
  1525. end
  1526. else
  1527. begin
  1528. while i<SourceChars do
  1529. begin
  1530. case word(Source[i]) of
  1531. $0..$7f:
  1532. inc(j);
  1533. $80..$7ff:
  1534. inc(j,2);
  1535. $800..$d7ff,$e000..$ffff:
  1536. inc(j,3);
  1537. $d800..$dbff:
  1538. begin
  1539. if (i+1<sourcechars) and
  1540. (word(Source[i+1]) >= $dc00) and
  1541. (word(Source[i+1]) <= $dfff) then
  1542. begin
  1543. inc(j,4);
  1544. inc(i);
  1545. end;
  1546. end;
  1547. end;
  1548. inc(i);
  1549. end;
  1550. end;
  1551. result:=j+1;
  1552. end;
  1553. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1554. begin
  1555. if assigned(Source) then
  1556. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
  1557. else
  1558. Result:=0;
  1559. end;
  1560. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1561. const
  1562. UNICODE_INVALID=63;
  1563. var
  1564. InputUTF8: SizeUInt;
  1565. IBYTE: BYTE;
  1566. OutputUnicode: SizeUInt;
  1567. PRECHAR: SizeUInt;
  1568. TempBYTE: BYTE;
  1569. CharLen: SizeUint;
  1570. LookAhead: SizeUInt;
  1571. UC: SizeUInt;
  1572. begin
  1573. if not assigned(Source) then
  1574. begin
  1575. result:=0;
  1576. exit;
  1577. end;
  1578. result:=SizeUInt(-1);
  1579. InputUTF8:=0;
  1580. OutputUnicode:=0;
  1581. PreChar:=0;
  1582. if Assigned(Dest) Then
  1583. begin
  1584. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1585. begin
  1586. IBYTE:=byte(Source[InputUTF8]);
  1587. if (IBYTE and $80) = 0 then
  1588. begin
  1589. //One character US-ASCII, convert it to unicode
  1590. if IBYTE = 10 then
  1591. begin
  1592. If (PreChar<>13) and FALSE then
  1593. begin
  1594. //Expand to crlf, conform UTF-8.
  1595. //This procedure will break the memory alocation by
  1596. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1597. if OutputUnicode+1<MaxDestChars then
  1598. begin
  1599. Dest[OutputUnicode]:=WideChar(13);
  1600. inc(OutputUnicode);
  1601. Dest[OutputUnicode]:=WideChar(10);
  1602. inc(OutputUnicode);
  1603. PreChar:=10;
  1604. end
  1605. else
  1606. begin
  1607. Dest[OutputUnicode]:=WideChar(13);
  1608. inc(OutputUnicode);
  1609. end;
  1610. end
  1611. else
  1612. begin
  1613. Dest[OutputUnicode]:=WideChar(IBYTE);
  1614. inc(OutputUnicode);
  1615. PreChar:=IBYTE;
  1616. end;
  1617. end
  1618. else
  1619. begin
  1620. Dest[OutputUnicode]:=WideChar(IBYTE);
  1621. inc(OutputUnicode);
  1622. PreChar:=IBYTE;
  1623. end;
  1624. inc(InputUTF8);
  1625. end
  1626. else
  1627. begin
  1628. TempByte:=IBYTE;
  1629. CharLen:=0;
  1630. while (TempBYTE and $80)<>0 do
  1631. begin
  1632. TempBYTE:=(TempBYTE shl 1) and $FE;
  1633. inc(CharLen);
  1634. end;
  1635. //Test for the "CharLen" conforms UTF-8 string
  1636. //This means the 10xxxxxx pattern.
  1637. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1638. begin
  1639. //Insuficient chars in string to decode
  1640. //UTF-8 array. Fallback to single char.
  1641. CharLen:= 1;
  1642. end;
  1643. for LookAhead := 1 to CharLen-1 do
  1644. begin
  1645. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1646. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1647. begin
  1648. //Invalid UTF-8 sequence, fallback.
  1649. CharLen:= LookAhead;
  1650. break;
  1651. end;
  1652. end;
  1653. UC:=$FFFF;
  1654. case CharLen of
  1655. 1: begin
  1656. //Not valid UTF-8 sequence
  1657. UC:=UNICODE_INVALID;
  1658. end;
  1659. 2: begin
  1660. //Two bytes UTF, convert it
  1661. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1662. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1663. if UC <= $7F then
  1664. begin
  1665. //Invalid UTF sequence.
  1666. UC:=UNICODE_INVALID;
  1667. end;
  1668. end;
  1669. 3: begin
  1670. //Three bytes, convert it to unicode
  1671. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1672. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1673. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1674. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1675. begin
  1676. //Invalid UTF-8 sequence
  1677. UC:= UNICODE_INVALID;
  1678. End;
  1679. end;
  1680. 4: begin
  1681. //Four bytes, convert it to two unicode characters
  1682. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1683. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1684. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1685. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1686. if (UC < $10000) or (UC > $10FFFF) then
  1687. begin
  1688. UC:= UNICODE_INVALID;
  1689. end
  1690. else
  1691. begin
  1692. { only store pair if room }
  1693. dec(UC,$10000);
  1694. if (OutputUnicode<MaxDestChars-1) then
  1695. begin
  1696. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1697. inc(OutputUnicode);
  1698. UC:=(UC and $3ff) + $DC00;
  1699. end
  1700. else
  1701. begin
  1702. InputUTF8:= InputUTF8 + CharLen;
  1703. { don't store anything }
  1704. CharLen:=0;
  1705. end;
  1706. end;
  1707. end;
  1708. 5,6,7: begin
  1709. //Invalid UTF8 to unicode conversion,
  1710. //mask it as invalid UNICODE too.
  1711. UC:=UNICODE_INVALID;
  1712. end;
  1713. end;
  1714. if CharLen > 0 then
  1715. begin
  1716. PreChar:=UC;
  1717. Dest[OutputUnicode]:=WideChar(UC);
  1718. inc(OutputUnicode);
  1719. end;
  1720. InputUTF8:= InputUTF8 + CharLen;
  1721. end;
  1722. end;
  1723. Result:=OutputUnicode+1;
  1724. end
  1725. else
  1726. begin
  1727. while (InputUTF8<SourceBytes) do
  1728. begin
  1729. IBYTE:=byte(Source[InputUTF8]);
  1730. if (IBYTE and $80) = 0 then
  1731. begin
  1732. //One character US-ASCII, convert it to unicode
  1733. if IBYTE = 10 then
  1734. begin
  1735. if (PreChar<>13) and FALSE then
  1736. begin
  1737. //Expand to crlf, conform UTF-8.
  1738. //This procedure will break the memory alocation by
  1739. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1740. inc(OutputUnicode,2);
  1741. PreChar:=10;
  1742. end
  1743. else
  1744. begin
  1745. inc(OutputUnicode);
  1746. PreChar:=IBYTE;
  1747. end;
  1748. end
  1749. else
  1750. begin
  1751. inc(OutputUnicode);
  1752. PreChar:=IBYTE;
  1753. end;
  1754. inc(InputUTF8);
  1755. end
  1756. else
  1757. begin
  1758. TempByte:=IBYTE;
  1759. CharLen:=0;
  1760. while (TempBYTE and $80)<>0 do
  1761. begin
  1762. TempBYTE:=(TempBYTE shl 1) and $FE;
  1763. inc(CharLen);
  1764. end;
  1765. //Test for the "CharLen" conforms UTF-8 string
  1766. //This means the 10xxxxxx pattern.
  1767. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1768. begin
  1769. //Insuficient chars in string to decode
  1770. //UTF-8 array. Fallback to single char.
  1771. CharLen:= 1;
  1772. end;
  1773. for LookAhead := 1 to CharLen-1 do
  1774. begin
  1775. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1776. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1777. begin
  1778. //Invalid UTF-8 sequence, fallback.
  1779. CharLen:= LookAhead;
  1780. break;
  1781. end;
  1782. end;
  1783. UC:=$FFFF;
  1784. case CharLen of
  1785. 1: begin
  1786. //Not valid UTF-8 sequence
  1787. UC:=UNICODE_INVALID;
  1788. end;
  1789. 2: begin
  1790. //Two bytes UTF, convert it
  1791. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1792. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1793. if UC <= $7F then
  1794. begin
  1795. //Invalid UTF sequence.
  1796. UC:=UNICODE_INVALID;
  1797. end;
  1798. end;
  1799. 3: begin
  1800. //Three bytes, convert it to unicode
  1801. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1802. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1803. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1804. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1805. begin
  1806. //Invalid UTF-8 sequence
  1807. UC:= UNICODE_INVALID;
  1808. end;
  1809. end;
  1810. 4: begin
  1811. //Four bytes, convert it to two unicode characters
  1812. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1813. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1814. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1815. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1816. if (UC < $10000) or (UC > $10FFFF) then
  1817. UC:= UNICODE_INVALID
  1818. else
  1819. { extra character character }
  1820. inc(OutputUnicode);
  1821. end;
  1822. 5,6,7: begin
  1823. //Invalid UTF8 to unicode conversion,
  1824. //mask it as invalid UNICODE too.
  1825. UC:=UNICODE_INVALID;
  1826. end;
  1827. end;
  1828. if CharLen > 0 then
  1829. begin
  1830. PreChar:=UC;
  1831. inc(OutputUnicode);
  1832. end;
  1833. InputUTF8:= InputUTF8 + CharLen;
  1834. end;
  1835. end;
  1836. Result:=OutputUnicode+1;
  1837. end;
  1838. end;
  1839. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1840. begin
  1841. Result:=UTF8Encode(UnicodeString(s));
  1842. end;
  1843. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1844. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1845. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1846. var
  1847. i : SizeInt;
  1848. hs : UTF8String;
  1849. begin
  1850. result:='';
  1851. if s='' then
  1852. exit;
  1853. SetLength(hs,length(s)*3);
  1854. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1855. if i>0 then
  1856. begin
  1857. SetLength(hs,i-1);
  1858. result:=hs;
  1859. end;
  1860. end;
  1861. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1862. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1863. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1864. function UTF8Decode(const s : RawByteString): UnicodeString;
  1865. var
  1866. i : SizeInt;
  1867. hs : UnicodeString;
  1868. begin
  1869. result:='';
  1870. if s='' then
  1871. exit;
  1872. SetLength(hs,length(s));
  1873. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1874. if i>0 then
  1875. begin
  1876. SetLength(hs,i-1);
  1877. result:=hs;
  1878. end;
  1879. end;
  1880. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1881. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1882. begin
  1883. Result:=Utf8Encode(s);
  1884. end;
  1885. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1886. begin
  1887. Result:=RawByteString(Utf8Decode(s));
  1888. end;
  1889. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1890. var
  1891. i, reslen: sizeint;
  1892. w: longint;
  1893. begin
  1894. reslen:=0;
  1895. i:=0;
  1896. { calculate required length }
  1897. while (i<len) do
  1898. begin
  1899. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1900. inc(i)
  1901. else if (p[i]<=#$dbff) and
  1902. (i+1<len) and
  1903. (p[i+1]>=#$dc00) and
  1904. (p[i+1]<=#$dfff) then
  1905. inc(i,2)
  1906. else
  1907. inc(i);
  1908. inc(reslen);
  1909. end;
  1910. SetLength(res,reslen+1); { +1 for null termination }
  1911. reslen:=0;
  1912. i:=0;
  1913. { do conversion }
  1914. while (i<len) do
  1915. begin
  1916. w:=ord(p[i]);
  1917. if (w<=$d7ff) or (w>=$e000) then
  1918. res[reslen]:=w
  1919. else if (w<=$dbff) and
  1920. (i+1<len) and
  1921. (p[i+1]>=#$dc00) and
  1922. (p[i+1]<=#$dfff) then
  1923. begin
  1924. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1925. inc(i);
  1926. end
  1927. else { invalid surrogate pair }
  1928. res[reslen]:=w;
  1929. inc(i);
  1930. inc(reslen);
  1931. end;
  1932. res[reslen]:=0;
  1933. end;
  1934. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1935. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1936. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1937. begin
  1938. UCS4Encode(PWideChar(s),Length(s),result);
  1939. end;
  1940. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1941. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1942. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1943. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1944. begin
  1945. UCS4Encode(PWideChar(s),Length(s),result);
  1946. end;
  1947. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1948. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1949. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1950. { dest should point to previously allocated wide/unicodestring }
  1951. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1952. var
  1953. i: sizeint;
  1954. nc: UCS4Char;
  1955. begin
  1956. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1957. begin
  1958. nc:=s[i];
  1959. if (nc<=$ffff) then
  1960. dest^:=widechar(nc)
  1961. else if (dword(nc)<=$10ffff) then
  1962. begin
  1963. dest^:=widechar(nc shr 10 + $d7c0);
  1964. { subtracting $10000 doesn't change low 10 bits }
  1965. dest[1]:=widechar(nc and $3ff + $dc00);
  1966. inc(dest);
  1967. end
  1968. else { invalid code point }
  1969. dest^:='?';
  1970. inc(dest);
  1971. end;
  1972. end;
  1973. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1974. var
  1975. i : SizeInt;
  1976. reslen : SizeInt;
  1977. begin
  1978. reslen:=0;
  1979. for i:=0 to length(s)-2 do { skip terminating #0 }
  1980. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1981. SetLength(result,reslen);
  1982. UCS4Decode(s,pointer(result));
  1983. end;
  1984. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1985. var
  1986. i : SizeInt;
  1987. reslen : SizeInt;
  1988. begin
  1989. reslen:=0;
  1990. for i:=0 to length(s)-2 do { skip terminating #0 }
  1991. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1992. SetLength(result,reslen);
  1993. UCS4Decode(s,pointer(result));
  1994. end;
  1995. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1996. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1997. const
  1998. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  1999. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  2000. procedure unimplementedunicodestring;
  2001. begin
  2002. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2003. If IsConsole then
  2004. begin
  2005. Writeln(StdErr,SNoUnicodestrings);
  2006. Writeln(StdErr,SRecompileWithUnicodestrings);
  2007. end;
  2008. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2009. HandleErrorAddrFrameInd(233,get_pc_addr,get_frame);
  2010. end;
  2011. function StringElementSize(const S: UnicodeString): Word; overload;
  2012. begin
  2013. if assigned(Pointer(S)) then
  2014. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2015. else
  2016. Result:=SizeOf(UnicodeChar);
  2017. end;
  2018. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2019. begin
  2020. if assigned(Pointer(S)) then
  2021. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2022. else
  2023. Result:=0;
  2024. end;
  2025. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2026. begin
  2027. {$ifdef FPC_HAS_CPSTRING}
  2028. if assigned(Pointer(S)) then
  2029. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2030. else
  2031. {$endif FPC_HAS_CPSTRING}
  2032. Result:=DefaultUnicodeCodePage;
  2033. end;
  2034. {$warnings off}
  2035. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2036. begin
  2037. unimplementedunicodestring;
  2038. end;
  2039. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2040. begin
  2041. unimplementedunicodestring;
  2042. end;
  2043. function StubWideCase(const s: WideString): WideString;
  2044. begin
  2045. unimplementedunicodestring;
  2046. end;
  2047. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2048. begin
  2049. unimplementedunicodestring;
  2050. end;
  2051. {$warnings on}
  2052. procedure initunicodestringmanager;
  2053. begin
  2054. {$ifndef HAS_WIDESTRINGMANAGER}
  2055. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2056. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2057. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2058. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2059. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2060. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2061. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2062. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2063. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2064. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2065. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2066. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2067. {$endif HAS_WIDESTRINGMANAGER}
  2068. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2069. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2070. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2071. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2072. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2073. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2074. end;
  2075. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2076. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2077. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2078. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2079. Begin
  2080. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2081. DefaultFileSystemCodePage,Length(Str));
  2082. End;
  2083. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2084. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2085. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2086. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2087. Begin
  2088. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2089. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2090. End;
  2091. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2092. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2093. Begin
  2094. Result:=Str;
  2095. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2096. End;
  2097. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2098. ignore any codepage }
  2099. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2100. begin
  2101. Result := UTF8Decode(S);
  2102. end;
  2103. function UTF8ToString(const S: ShortString): UnicodeString;
  2104. Var
  2105. rs: RawByteString;
  2106. begin
  2107. rs:=S;
  2108. Result := UTF8Decode(rs);
  2109. end;
  2110. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2111. var
  2112. rs: RawByteString;
  2113. Count: Integer;
  2114. begin
  2115. Count := length(S);
  2116. SetLength(rs, Count);
  2117. if Count > 0 then
  2118. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2119. Result := UTF8ToString(rs);
  2120. end;
  2121. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2122. are as well }
  2123. {$ifndef CPUJVM}
  2124. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2125. var
  2126. rs: RawByteString;
  2127. Count: Integer;
  2128. begin
  2129. Count := Length(S);
  2130. SetLength(rs, Count);
  2131. if Count > 0 then
  2132. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2133. Result := UTF8ToString(rs);
  2134. end;
  2135. function UTF8ToString(const S: array of Byte): UnicodeString;
  2136. var
  2137. rs: RawByteString;
  2138. Count: Integer;
  2139. begin
  2140. Count := Length(S);
  2141. SetLength(rs, Count);
  2142. if Count > 0 then
  2143. fpc_pchar_ansistr_intern_charmove(pchar(@S),Low(S),rs,0,Count);
  2144. Result := UTF8ToString(rs);
  2145. end;
  2146. {$endif not CPUJVM}