ustrings.inc 69 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S = SizeOf(SizeInt)) :
  19. @-2*S : SizeInt for reference count;
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. end;
  40. Const
  41. UnicodeFirstOff = SizeOf(TUnicodeRec);
  42. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overridden for the Current Locale
  47. }
  48. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  49. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  50. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  51. var
  52. i : SizeInt;
  53. p : PAnsiChar;
  54. begin
  55. setlength(dest,len);
  56. if not assigned(pointer(dest)) then
  57. exit;
  58. SetCodePage(dest,cp,false);
  59. p:=pointer(dest); {SetLength guarantees that dest is unique}
  60. for i:=1 to len do
  61. begin
  62. if word(source^)<256 then
  63. p^:=char(word(source^))
  64. else
  65. p^:='?';
  66. inc(source);
  67. inc(p);
  68. end;
  69. end;
  70. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  71. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  72. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  73. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  74. var
  75. i : SizeInt;
  76. p : PUnicodeChar;
  77. begin
  78. setlength(dest,len);
  79. p:=pointer(dest); {SetLength guarantees that dest is unique}
  80. for i:=1 to len do
  81. begin
  82. p^:=unicodechar(byte(source^));
  83. inc(source);
  84. inc(p);
  85. end;
  86. end;
  87. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  88. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  89. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  90. begin
  91. DefaultCharLengthPChar:=length(Str);
  92. end;
  93. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  94. begin
  95. if str[0]<>#0 then
  96. DefaultCodePointLength:=1
  97. else
  98. DefaultCodePointLength:=0;
  99. end;
  100. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  101. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  102. begin
  103. { don't raise an exception here. We need this for text file handling }
  104. if stdcp<>scpFileSystemSingleByte then
  105. Result:=DefaultSystemCodePage
  106. else
  107. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  108. without a fully functional widestring manager that will probably cause
  109. more problems that it solves }
  110. Result:=DefaultFileSystemCodePage
  111. end;
  112. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  113. begin
  114. manager:=widestringmanager;
  115. end;
  116. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  117. begin
  118. Old:=widestringmanager;
  119. widestringmanager:=New;
  120. end;
  121. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  122. begin
  123. widestringmanager:=New;
  124. end;
  125. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  126. begin
  127. manager:=widestringmanager;
  128. end;
  129. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  130. begin
  131. Old:=widestringmanager;
  132. widestringmanager:=New;
  133. end;
  134. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  135. begin
  136. widestringmanager:=New;
  137. end;
  138. {****************************************************************************
  139. Internal functions, not in interface.
  140. ****************************************************************************}
  141. procedure UnicodeStringError;
  142. begin
  143. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  144. end;
  145. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  146. {$define FPC_HAS_NEW_UNICODESTRING}
  147. Function NewUnicodeString(Len : SizeInt) : Pointer;
  148. {
  149. Allocate a new UnicodeString on the heap.
  150. initialize it to zero length and reference count 1.
  151. }
  152. Var
  153. P : Pointer;
  154. begin
  155. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  156. If P<>Nil then
  157. begin
  158. PUnicodeRec(P)^.Len:=Len; { Initial length }
  159. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  160. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  161. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  162. inc(p,UnicodeFirstOff); { Points to string now }
  163. PUnicodeChar(P)^:=#0; { Terminating #0 }
  164. end
  165. else
  166. UnicodeStringError;
  167. NewUnicodeString:=P;
  168. end;
  169. {$endif FPC_HAS_NEW_UNICODESTRING}
  170. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  171. {$define FPC_HAS_UNICODESTR_DECR_REF}
  172. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  173. {
  174. Decreases the ReferenceCount of a non constant unicodestring;
  175. If the reference count is zero, deallocate the string;
  176. }
  177. Var
  178. p: PUnicodeRec;
  179. Begin
  180. { Zero string }
  181. if S=Nil then
  182. exit;
  183. { check for constant strings ...}
  184. p:=PUnicodeRec(S-UnicodeFirstOff);
  185. S:=nil;
  186. if p^.Ref<0 then
  187. exit;
  188. { declocked does a MT safe dec and returns true, if the counter is 0 }
  189. if declocked(p^.Ref) then
  190. FreeMem(p);
  191. end;
  192. { alias for internal use }
  193. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  194. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  195. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  196. {$define FPC_HAS_UNICODESTR_INCR_REF}
  197. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  198. Begin
  199. If S=Nil then
  200. exit;
  201. { constant string ? }
  202. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  203. exit;
  204. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  205. end;
  206. { alias for internal use }
  207. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  208. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  209. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  210. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  211. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  212. {
  213. Converts a UnicodeString to a ShortString;
  214. }
  215. Var
  216. Size : SizeInt;
  217. temp : ansistring;
  218. begin
  219. res:='';
  220. Size:=Length(S2);
  221. if Size>0 then
  222. begin
  223. If Size>high(res) then
  224. Size:=high(res);
  225. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  226. res:=temp;
  227. end;
  228. end;
  229. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  230. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  231. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  232. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  233. {
  234. Converts a ShortString to a UnicodeString;
  235. }
  236. Var
  237. Size : SizeInt;
  238. begin
  239. result:='';
  240. Size:=Length(S2);
  241. if Size>0 then
  242. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  243. end;
  244. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  245. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  246. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  247. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  248. {
  249. Converts a UnicodeString to an AnsiString
  250. }
  251. Var
  252. Size : SizeInt;
  253. {$ifndef FPC_HAS_CPSTRING}
  254. cp : TSystemCodePage;
  255. {$endif FPC_HAS_CPSTRING}
  256. begin
  257. {$ifndef FPC_HAS_CPSTRING}
  258. cp:=DefaultSystemCodePage;
  259. {$endif FPC_HAS_CPSTRING}
  260. result:='';
  261. Size:=Length(S2);
  262. if Size>0 then
  263. begin
  264. cp:=TranslatePlaceholderCP(cp);
  265. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  266. end;
  267. end;
  268. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  269. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  270. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  271. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  272. {
  273. Converts an AnsiString to a UnicodeString;
  274. }
  275. Var
  276. Size : SizeInt;
  277. cp: TSystemCodePage;
  278. begin
  279. result:='';
  280. Size:=Length(S2);
  281. if Size>0 then
  282. begin
  283. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  284. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  285. end;
  286. end;
  287. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  288. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  289. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  290. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  291. begin
  292. SetLength(Result,Length(S2));
  293. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  294. end;
  295. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  296. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  297. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  298. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  304. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  305. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  306. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  307. var
  308. Size : SizeInt;
  309. begin
  310. result:='';
  311. if p=nil then
  312. exit;
  313. Size := IndexWord(p^, -1, 0);
  314. Setlength(result,Size);
  315. if Size>0 then
  316. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  317. end;
  318. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  319. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  320. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  321. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  322. var
  323. Size : SizeInt;
  324. {$ifndef FPC_HAS_CPSTRING}
  325. cp : TSystemCodePage;
  326. {$endif FPC_HAS_CPSTRING}
  327. begin
  328. {$ifndef FPC_HAS_CPSTRING}
  329. cp:=DefaultSystemCodePage;
  330. {$endif FPC_HAS_CPSTRING}
  331. result:='';
  332. if p=nil then
  333. exit;
  334. Size := IndexWord(p^, -1, 0);
  335. if Size>0 then
  336. begin
  337. cp:=TranslatePlaceholderCP(cp);
  338. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  339. end;
  340. end;
  341. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  342. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  343. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  344. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  345. var
  346. Size : SizeInt;
  347. temp: ansistring;
  348. begin
  349. res:='';
  350. if p=nil then
  351. exit;
  352. Size:=IndexWord(p^, high(PtrInt), 0);
  353. if Size>0 then
  354. begin
  355. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  356. res:=temp;
  357. end;
  358. end;
  359. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  360. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  361. {$define FPC_UNICODESTR_ASSIGN}
  362. { checked against the ansistring routine, 2001-05-27 (FK) }
  363. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  364. {
  365. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  366. }
  367. begin
  368. If S2<>nil then
  369. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  370. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  371. { Decrease the reference count on the old S1 }
  372. fpc_unicodestr_decr_ref (S1);
  373. s1:=s2;
  374. end;
  375. { alias for internal use }
  376. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  377. {$endif FPC_UNICODESTR_ASSIGN}
  378. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  379. {$define FPC_HAS_UNICODESTR_CONCAT}
  380. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  381. Var
  382. Size,Location : SizeInt;
  383. same : boolean;
  384. begin
  385. { only assign if s1 or s2 is empty }
  386. if (S1='') then
  387. begin
  388. DestS:=s2;
  389. exit;
  390. end;
  391. if (S2='') then
  392. begin
  393. DestS:=s1;
  394. exit;
  395. end;
  396. Location:=Length(S1);
  397. Size:=length(S2);
  398. { Use Pointer() typecasts to prevent extra conversion code }
  399. if Pointer(DestS)=Pointer(S1) then
  400. begin
  401. same:=Pointer(S1)=Pointer(S2);
  402. SetLength(DestS,Size+Location);
  403. if same then
  404. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  405. else
  406. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  407. end
  408. else if Pointer(DestS)=Pointer(S2) then
  409. begin
  410. SetLength(DestS,Size+Location);
  411. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  412. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  413. end
  414. else
  415. begin
  416. DestS:='';
  417. SetLength(DestS,Size+Location);
  418. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  419. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  420. end;
  421. end;
  422. {$endif FPC_HAS_UNICODESTR_CONCAT}
  423. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  424. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  425. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  426. Var
  427. i : Longint;
  428. p,pc : pointer;
  429. Size,NewLen : SizeInt;
  430. lowstart : longint;
  431. destcopy : pointer;
  432. OldDestLen : SizeInt;
  433. begin
  434. if high(sarr)=0 then
  435. begin
  436. DestS:='';
  437. exit;
  438. end;
  439. destcopy:=nil;
  440. lowstart:=low(sarr);
  441. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  442. inc(lowstart);
  443. { Check for another reuse, then we can't use
  444. the append optimization }
  445. for i:=lowstart to high(sarr) do
  446. begin
  447. if Pointer(DestS)=Pointer(sarr[i]) then
  448. begin
  449. { if DestS is used somewhere in the middle of the expression,
  450. we need to make sure the original string still exists after
  451. we empty/modify DestS.
  452. This trick only works with reference counted strings. Therefor
  453. this optimization is disabled for WINLIKEUNICODESTRING }
  454. destcopy:=pointer(dests);
  455. fpc_UnicodeStr_Incr_Ref(destcopy);
  456. lowstart:=low(sarr);
  457. break;
  458. end;
  459. end;
  460. { Start with empty DestS if we start with concatting
  461. the first array element }
  462. if lowstart=low(sarr) then
  463. DestS:='';
  464. OldDestLen:=length(DestS);
  465. { Calculate size of the result so we can do
  466. a single call to SetLength() }
  467. NewLen:=0;
  468. for i:=low(sarr) to high(sarr) do
  469. inc(NewLen,length(sarr[i]));
  470. SetLength(DestS,NewLen);
  471. { Concat all strings, except the string we already
  472. copied in DestS }
  473. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  474. for i:=lowstart to high(sarr) do
  475. begin
  476. p:=pointer(sarr[i]);
  477. if assigned(p) then
  478. begin
  479. Size:=length(unicodestring(p));
  480. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  481. inc(pc,size*sizeof(UnicodeChar));
  482. end;
  483. end;
  484. fpc_UnicodeStr_Decr_Ref(destcopy);
  485. end;
  486. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  487. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  488. {$define FPC_HAS_CHAR_TO_UCHAR}
  489. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  490. var
  491. w: unicodestring;
  492. begin
  493. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  494. fpc_Char_To_UChar:=w[1];
  495. end;
  496. {$endif FPC_HAS_CHAR_TO_UCHAR}
  497. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  498. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  499. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  500. {
  501. Converts a Char to a UnicodeString;
  502. }
  503. begin
  504. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  505. end;
  506. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  507. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  508. {$define FPC_HAS_UCHAR_TO_CHAR}
  509. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  510. {
  511. Converts a UnicodeChar to a Char;
  512. }
  513. var
  514. s: ansistring;
  515. begin
  516. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  517. if length(s)=1 then
  518. fpc_UChar_To_Char:= s[1]
  519. else
  520. fpc_UChar_To_Char:='?';
  521. end;
  522. {$endif FPC_HAS_UCHAR_TO_CHAR}
  523. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  524. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  525. {$ifdef VER2_6}
  526. procedure fpc_UChar_To_ShortStr(out result : shortstring;const c : WideChar); compilerproc;
  527. {$else}
  528. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  529. {$endif}
  530. {
  531. Converts a WideChar to a ShortString;
  532. }
  533. var
  534. s: ansistring;
  535. begin
  536. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  537. result:=s;
  538. end;
  539. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  540. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  541. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  542. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  543. {
  544. Converts a UnicodeChar to a UnicodeString;
  545. }
  546. begin
  547. Setlength (fpc_UChar_To_UnicodeStr,1);
  548. fpc_UChar_To_UnicodeStr[1]:= c;
  549. end;
  550. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  551. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  552. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  553. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  554. {
  555. Converts a UnicodeChar to a AnsiString;
  556. }
  557. {$ifndef FPC_HAS_CPSTRING}
  558. var
  559. cp : TSystemCodePage;
  560. {$endif FPC_HAS_CPSTRING}
  561. begin
  562. {$ifndef FPC_HAS_CPSTRING}
  563. cp:=DefaultSystemCodePage;
  564. {$endif FPC_HAS_CPSTRING}
  565. cp:=TranslatePlaceholderCP(cp);
  566. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  567. end;
  568. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  569. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  570. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  571. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  572. Var
  573. L : SizeInt;
  574. begin
  575. if (not assigned(p)) or (p[0]=#0) Then
  576. begin
  577. fpc_pchar_to_unicodestr := '';
  578. exit;
  579. end;
  580. l:=IndexChar(p^,-1,#0);
  581. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  582. end;
  583. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  584. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  585. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  586. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  587. var
  588. i : SizeInt;
  589. begin
  590. if zerobased then
  591. begin
  592. if arr[0]=#0 Then
  593. begin
  594. fpc_chararray_to_unicodestr:='';
  595. exit;
  596. end;
  597. i:=IndexChar(arr,high(arr)+1,#0);
  598. if i=-1 then
  599. i:=high(arr)+1;
  600. end
  601. else
  602. i:=high(arr)+1;
  603. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  604. end;
  605. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  606. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  607. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  608. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  609. var
  610. i : SizeInt;
  611. begin
  612. if (zerobased) then
  613. begin
  614. i:=IndexWord(arr,high(arr)+1,0);
  615. if i = -1 then
  616. i := high(arr)+1;
  617. end
  618. else
  619. i := high(arr)+1;
  620. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  621. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  622. end;
  623. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  624. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  625. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  626. { due to their names, the following procedures should be in wstrings.inc,
  627. however, the compiler generates code using this functions on all platforms }
  628. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  629. var
  630. l: longint;
  631. index: ptrint;
  632. len: byte;
  633. temp: ansistring;
  634. begin
  635. l := high(arr)+1;
  636. if l>=high(res)+1 then
  637. l:=high(res)
  638. else if l<0 then
  639. l:=0;
  640. if zerobased then
  641. begin
  642. index:=IndexWord(arr[0],l,0);
  643. if index<0 then
  644. len:=l
  645. else
  646. len:=index;
  647. end
  648. else
  649. len:=l;
  650. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  651. res:=temp;
  652. end;
  653. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  654. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  655. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  656. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  657. var
  658. i : SizeInt;
  659. {$ifndef FPC_HAS_CPSTRING}
  660. cp : TSystemCodePage;
  661. {$endif FPC_HAS_CPSTRING}
  662. begin
  663. {$ifndef FPC_HAS_CPSTRING}
  664. cp:=DefaultSystemCodePage;
  665. {$endif FPC_HAS_CPSTRING}
  666. if (zerobased) then
  667. begin
  668. i:=IndexWord(arr,high(arr)+1,0);
  669. if i = -1 then
  670. i := high(arr)+1;
  671. end
  672. else
  673. i := high(arr)+1;
  674. if i > 0 then
  675. begin
  676. cp:=TranslatePlaceholderCP(cp);
  677. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  678. end
  679. else
  680. fpc_WideCharArray_To_AnsiStr:='';
  681. end;
  682. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  683. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  684. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  685. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  686. var
  687. i : SizeInt;
  688. begin
  689. if (zerobased) then
  690. begin
  691. i:=IndexWord(arr,high(arr)+1,0);
  692. if i = -1 then
  693. i := high(arr)+1;
  694. end
  695. else
  696. i := high(arr)+1;
  697. SetLength(fpc_WideCharArray_To_WideStr,i);
  698. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  699. end;
  700. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  701. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  702. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  703. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  704. var
  705. len: SizeInt;
  706. temp: ansistring;
  707. begin
  708. len := length(src);
  709. { make sure we don't dereference src if it can be nil (JM) }
  710. if len > 0 then
  711. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  712. len := length(temp);
  713. if len > length(res) then
  714. len := length(res);
  715. {$push}
  716. {$r-}
  717. move(temp[1],res[0],len);
  718. fillchar(res[len],length(res)-len,0);
  719. {$pop}
  720. end;
  721. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  722. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  723. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  724. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  725. var
  726. len: SizeInt;
  727. temp: widestring;
  728. begin
  729. len := length(src);
  730. { make sure we don't dereference src if it can be nil (JM) }
  731. if len > 0 then
  732. widestringmanager.ansi2widemoveproc(pchar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  733. len := length(temp);
  734. if len > length(res) then
  735. len := length(res);
  736. {$push}
  737. {$r-}
  738. move(temp[1],res[0],len*sizeof(widechar));
  739. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  740. {$pop}
  741. end;
  742. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  743. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  744. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  745. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  746. var
  747. len: longint;
  748. temp : widestring;
  749. begin
  750. len := length(src);
  751. { make sure we don't access char 1 if length is 0 (JM) }
  752. if len > 0 then
  753. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  754. len := length(temp);
  755. if len > length(res) then
  756. len := length(res);
  757. {$push}
  758. {$r-}
  759. move(temp[1],res[0],len*sizeof(widechar));
  760. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  761. {$pop}
  762. end;
  763. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  764. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  765. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  766. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  767. var
  768. len: SizeInt;
  769. begin
  770. len := length(src);
  771. if len > length(res) then
  772. len := length(res);
  773. {$push}
  774. {$r-}
  775. { make sure we don't try to access element 1 of the widestring if it's nil }
  776. if len > 0 then
  777. move(src[1],res[0],len*SizeOf(WideChar));
  778. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  779. {$pop}
  780. end;
  781. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  782. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  783. {$define FPC_HAS_UNICODESTR_COMPARE}
  784. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  785. {
  786. Compares 2 UnicodeStrings;
  787. The result is
  788. <0 if S1<S2
  789. 0 if S1=S2
  790. >0 if S1>S2
  791. }
  792. Var
  793. MaxI,Temp : SizeInt;
  794. begin
  795. if pointer(S1)=pointer(S2) then
  796. begin
  797. fpc_UnicodeStr_Compare:=0;
  798. exit;
  799. end;
  800. Maxi:=Length(S1);
  801. temp:=Length(S2);
  802. If MaxI>Temp then
  803. MaxI:=Temp;
  804. Temp:=CompareWord(S1[1],S2[1],MaxI);
  805. if temp=0 then
  806. temp:=Length(S1)-Length(S2);
  807. fpc_UnicodeStr_Compare:=Temp;
  808. end;
  809. {$endif FPC_HAS_UNICODESTR_COMPARE}
  810. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  811. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  812. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  813. {
  814. Compares 2 UnicodeStrings for equality only;
  815. The result is
  816. 0 if S1=S2
  817. <>0 if S1<>S2
  818. }
  819. Var
  820. MaxI : SizeInt;
  821. begin
  822. if pointer(S1)=pointer(S2) then
  823. exit(0);
  824. Maxi:=Length(S1);
  825. If MaxI<>Length(S2) then
  826. exit(-1)
  827. else
  828. exit(CompareWord(S1[1],S2[1],MaxI));
  829. end;
  830. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  831. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  832. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  833. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  834. begin
  835. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  836. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  837. end;
  838. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  839. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  840. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  841. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  842. {
  843. Sets The length of string S to L.
  844. Makes sure S is unique, and contains enough room.
  845. }
  846. Var
  847. Temp : Pointer;
  848. movelen: SizeInt;
  849. nl,lens, lena : SizeUInt;
  850. begin
  851. nl:=l;
  852. {$IFDEF VER2_6}
  853. nl:=nl*2;
  854. {$ENDIF}
  855. if (l>0) then
  856. begin
  857. if Pointer(S)=nil then
  858. begin
  859. { Need a complete new string...}
  860. Pointer(s):=NewUnicodeString(nl);
  861. end
  862. else
  863. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  864. begin
  865. Temp:=Pointer(s)-UnicodeFirstOff;
  866. lens:=MemSize(Temp);
  867. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  868. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  869. begin
  870. reallocmem(Temp, lena);
  871. Pointer(S):=Temp+UnicodeFirstOff;
  872. end;
  873. end
  874. else
  875. begin
  876. { Reallocation is needed... }
  877. Temp:=NewUnicodeString(nL);
  878. if Length(S)>0 then
  879. begin
  880. if l < succ(length(s)) then
  881. movelen := l
  882. { also move terminating null }
  883. else
  884. movelen := succ(length(s));
  885. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  886. end;
  887. fpc_unicodestr_decr_ref(Pointer(S));
  888. Pointer(S):=Temp;
  889. end;
  890. { Force nil termination in case it gets shorter }
  891. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  892. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=nl;
  893. end
  894. else { length=0, deallocate the string }
  895. fpc_unicodestr_decr_ref (Pointer(S));
  896. end;
  897. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  898. {*****************************************************************************
  899. Public functions, In interface.
  900. *****************************************************************************}
  901. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  902. begin
  903. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  904. end;
  905. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  906. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  907. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  908. begin
  909. result:=StringToWideChar(Src,Dest,DestSize);
  910. end;
  911. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  912. function WideCharToString(S : PWideChar) : UnicodeString;
  913. begin
  914. result:=WideCharLenToString(s,Length(WideString(s)));
  915. end;
  916. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  917. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  918. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  919. var
  920. temp: widestring;
  921. Len: SizeInt;
  922. begin
  923. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  924. Len:=Length(temp);
  925. if DestSize<=Len then
  926. Len:=Destsize-1;
  927. move(temp[1],Dest^,Len*SizeOf(WideChar));
  928. Dest[Len]:=#0;
  929. result:=Dest;
  930. end;
  931. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  932. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  933. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  934. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  935. begin
  936. SetLength(result,Len);
  937. Move(S^,Pointer(Result)^,Len*2);
  938. end;
  939. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  940. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  941. begin
  942. Dest:=UnicodeCharLenToString(Src,Len);
  943. end;
  944. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  945. begin
  946. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  947. end;
  948. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  949. begin
  950. Dest:=AnsiString(UnicodeCharToString(S));
  951. end;
  952. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  953. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  954. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  955. begin
  956. SetLength(result,Len);
  957. Move(S^,Pointer(Result)^,Len*2);
  958. end;
  959. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  960. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  961. begin
  962. Dest:=WideCharLenToString(Src,Len);
  963. end;
  964. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  965. begin
  966. Dest:=AnsiString(WideCharLenToString(Src,Len));
  967. end;
  968. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  969. begin
  970. Dest:=WideCharToString(S);
  971. end;
  972. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  973. begin
  974. Dest:=AnsiString(WideCharToString(S));
  975. end;
  976. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  977. {$define FPC_HAS_UNICODESTR_UNIQUE}
  978. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  979. {
  980. Make sure reference count of S is 1,
  981. using copy-on-write semantics.
  982. }
  983. Var
  984. SNew : Pointer;
  985. L : SizeInt;
  986. begin
  987. pointer(result) := pointer(s);
  988. If Pointer(S)=Nil then
  989. exit;
  990. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  991. begin
  992. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  993. SNew:=NewUnicodeString (L);
  994. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  995. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  996. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  997. pointer(S):=SNew;
  998. pointer(result):=SNew;
  999. end;
  1000. end;
  1001. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1002. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1003. {$define FPC_HAS_UNICODESTR_COPY}
  1004. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1005. var
  1006. ResultAddress : Pointer;
  1007. begin
  1008. ResultAddress:=Nil;
  1009. dec(index);
  1010. if Index < 0 then
  1011. Index := 0;
  1012. { Check Size. Accounts for Zero-length S, the double check is needed because
  1013. Size can be maxint and will get <0 when adding index }
  1014. if (Size>Length(S)) or
  1015. (Index+Size>Length(S)) then
  1016. Size:=Length(S)-Index;
  1017. If Size>0 then
  1018. begin
  1019. ResultAddress:=NewUnicodeString(Size);
  1020. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1021. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1022. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1023. end;
  1024. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1025. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1026. end;
  1027. {$endif FPC_HAS_UNICODESTR_COPY}
  1028. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1029. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1030. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1031. var
  1032. i,MaxLen : SizeInt;
  1033. pc : punicodechar;
  1034. begin
  1035. Pos:=0;
  1036. if (Length(SubStr)>0) and (Offset>0) and (Offset<=Length(Source)) then
  1037. begin
  1038. MaxLen:=Length(source)-Length(SubStr)-(OffSet-1);
  1039. i:=0;
  1040. pc:=@source[OffSet];
  1041. while (i<=MaxLen) do
  1042. begin
  1043. inc(i);
  1044. if (SubStr[1]=pc^) and
  1045. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1046. begin
  1047. Pos:=Offset+i-1;
  1048. exit;
  1049. end;
  1050. inc(pc);
  1051. end;
  1052. end;
  1053. end;
  1054. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1055. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1056. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1057. { Faster version for a unicodechar alone }
  1058. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1059. var
  1060. i: SizeInt;
  1061. pc : punicodechar;
  1062. begin
  1063. if (Offset>0) and (Offset<=length(s)) then
  1064. begin
  1065. pc:=@s[OffSet];
  1066. for i:=OffSet to length(s) do
  1067. begin
  1068. if pc^=c then
  1069. begin
  1070. pos:=i;
  1071. exit;
  1072. end;
  1073. inc(pc);
  1074. end;
  1075. end;
  1076. pos:=0;
  1077. end;
  1078. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1079. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1080. block, which is significant bloat without any sensible speed improvement. }
  1081. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1082. begin
  1083. result:=Pos(UnicodeString(c),s,offset);
  1084. end;
  1085. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1086. begin
  1087. result:=Pos(UnicodeString(c),s,OffSet);
  1088. end;
  1089. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1090. begin
  1091. result:=Pos(c,UnicodeString(s),OffSet);
  1092. end;
  1093. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1094. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1095. { Faster version for a char alone. Must be implemented because }
  1096. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1097. { using pos(char,pchar) will always call the shortstring version }
  1098. { (exact match for first argument), also with $h+ (JM) }
  1099. Function Pos (c : Char; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1100. var
  1101. i: SizeInt;
  1102. wc : unicodechar;
  1103. pc : punicodechar;
  1104. begin
  1105. if (Offset>0) and (Offset<=Length(S)) then
  1106. begin
  1107. wc:=c;
  1108. pc:=@s[OffSet];
  1109. for i:=OffSet to length(s) do
  1110. begin
  1111. if pc^=wc then
  1112. begin
  1113. pos:=i;
  1114. exit;
  1115. end;
  1116. inc(pc);
  1117. end;
  1118. end;
  1119. pos:=0;
  1120. end;
  1121. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1122. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1123. {$define FPC_HAS_DELETE_UNICODESTR}
  1124. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1125. Var
  1126. LS : SizeInt;
  1127. begin
  1128. LS:=Length(S);
  1129. if (Index>LS) or (Index<=0) or (Size<=0) then
  1130. exit;
  1131. UniqueString (S);
  1132. { (Size+Index) will overflow if Size=MaxInt. }
  1133. if Size>LS-Index then
  1134. Size:=LS-Index+1;
  1135. if Size<=LS-Index then
  1136. begin
  1137. Dec(Index);
  1138. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1139. end;
  1140. Setlength(s,LS-Size);
  1141. end;
  1142. {$endif FPC_HAS_DELETE_UNICODESTR}
  1143. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1144. {$define FPC_HAS_INSERT_UNICODESTR}
  1145. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1146. var
  1147. Temp : UnicodeString;
  1148. LS : SizeInt;
  1149. begin
  1150. If Length(Source)=0 then
  1151. exit;
  1152. if index <= 0 then
  1153. index := 1;
  1154. Ls:=Length(S);
  1155. if index > LS then
  1156. index := LS+1;
  1157. Dec(Index);
  1158. SetLength(Temp,Length(Source)+LS);
  1159. If Index>0 then
  1160. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1161. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1162. If (LS-Index)>0 then
  1163. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1164. S:=Temp;
  1165. end;
  1166. {$endif FPC_HAS_INSERT_UNICODESTR}
  1167. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1168. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1169. Function UpCase(c:UnicodeChar):UnicodeChar;
  1170. var
  1171. s : UnicodeString;
  1172. begin
  1173. s:=c;
  1174. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1175. end;
  1176. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1177. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1178. {$define FPC_HAS_UPCASE_UNICODESTR}
  1179. function UpCase(const s : UnicodeString) : UnicodeString;
  1180. begin
  1181. result:=widestringmanager.UpperUnicodeStringProc(s);
  1182. end;
  1183. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1184. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1185. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1186. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1187. var
  1188. s : UnicodeString;
  1189. begin
  1190. s:=c;
  1191. result:=widestringmanager.LowerUnicodeStringProc(s)[1];
  1192. end;
  1193. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1194. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1195. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1196. function LowerCase(const s : UnicodeString) : UnicodeString;
  1197. begin
  1198. result:=widestringmanager.LowerUnicodeStringProc(s);
  1199. end;
  1200. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1201. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1202. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1203. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1204. begin
  1205. SetLength(S,Len);
  1206. If (Buf<>Nil) and (Len>0) then
  1207. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1208. end;
  1209. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1210. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1211. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1212. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1213. begin
  1214. If (Buf<>Nil) and (Len>0) then
  1215. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1216. else
  1217. SetLength(S,Len);
  1218. end;
  1219. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1220. {$ifndef FPUNONE}
  1221. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1222. Var
  1223. SS: ShortString;
  1224. begin
  1225. fpc_Val_Real_UnicodeStr:=0;
  1226. if length(S)>255 then
  1227. code:=256
  1228. else
  1229. begin
  1230. SS:=ShortString(S);
  1231. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1232. end;
  1233. end;
  1234. {$endif}
  1235. {$ifndef FPC_STR_ENUM_INTERN}
  1236. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1237. var
  1238. ss: ShortString;
  1239. begin
  1240. if length(s)>255 then
  1241. code:=256
  1242. else
  1243. begin
  1244. ss:=ShortString(s);
  1245. val(ss,fpc_val_enum_unicodestr,code);
  1246. end;
  1247. end;
  1248. {$endif FPC_STR_ENUM_INTERN}
  1249. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1250. Var
  1251. SS: ShortString;
  1252. begin
  1253. if length(S)>255 then
  1254. begin
  1255. fpc_Val_Currency_UnicodeStr:=0;
  1256. code:=256;
  1257. end
  1258. else
  1259. begin
  1260. SS:=ShortString(S);
  1261. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1262. end;
  1263. end;
  1264. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1265. Var
  1266. SS: ShortString;
  1267. begin
  1268. fpc_Val_UInt_UnicodeStr:=0;
  1269. if length(S)>255 then
  1270. code:=256
  1271. else
  1272. begin
  1273. SS:=ShortString(S);
  1274. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1275. end;
  1276. end;
  1277. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1278. Var
  1279. SS: ShortString;
  1280. begin
  1281. fpc_Val_SInt_UnicodeStr:=0;
  1282. if length(S)>255 then
  1283. code:=256
  1284. else
  1285. begin
  1286. SS:=ShortString(S);
  1287. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1288. end;
  1289. end;
  1290. {$ifndef CPU64}
  1291. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1292. Var
  1293. SS: ShortString;
  1294. begin
  1295. fpc_Val_qword_UnicodeStr:=0;
  1296. if length(S)>255 then
  1297. code:=256
  1298. else
  1299. begin
  1300. SS:=ShortString(S);
  1301. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1302. end;
  1303. end;
  1304. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1305. Var
  1306. SS: ShortString;
  1307. begin
  1308. fpc_Val_int64_UnicodeStr:=0;
  1309. if length(S)>255 then
  1310. code:=256
  1311. else
  1312. begin
  1313. SS:=ShortString(S);
  1314. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1315. end;
  1316. end;
  1317. {$endif CPU64}
  1318. {$if defined(CPU16) or defined(CPU8)}
  1319. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1320. Var
  1321. SS: ShortString;
  1322. begin
  1323. fpc_Val_longword_UnicodeStr:=0;
  1324. if length(S)>255 then
  1325. code:=256
  1326. else
  1327. begin
  1328. SS:=ShortString(S);
  1329. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1330. end;
  1331. end;
  1332. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1333. Var
  1334. SS: ShortString;
  1335. begin
  1336. fpc_Val_longint_UnicodeStr:=0;
  1337. if length(S)>255 then
  1338. code:=256
  1339. else
  1340. begin
  1341. SS:=ShortString(S);
  1342. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1343. end;
  1344. end;
  1345. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1346. Var
  1347. SS: ShortString;
  1348. begin
  1349. fpc_Val_word_UnicodeStr:=0;
  1350. if length(S)>255 then
  1351. code:=256
  1352. else
  1353. begin
  1354. SS:=ShortString(S);
  1355. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1356. end;
  1357. end;
  1358. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1359. Var
  1360. SS: ShortString;
  1361. begin
  1362. fpc_Val_smallint_UnicodeStr:=0;
  1363. if length(S)>255 then
  1364. code:=256
  1365. else
  1366. begin
  1367. SS:=ShortString(S);
  1368. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1369. end;
  1370. end;
  1371. {$endif CPU16 or CPU8}
  1372. {$ifndef FPUNONE}
  1373. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1374. var
  1375. ss: shortstring;
  1376. begin
  1377. str_real(len,fr,d,treal_type(rt),ss);
  1378. s:=UnicodeString(ss);
  1379. end;
  1380. {$endif}
  1381. {$ifndef FPC_STR_ENUM_INTERN}
  1382. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1383. var
  1384. ss: ShortString;
  1385. begin
  1386. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1387. s:=UnicodeString(ss);
  1388. end;
  1389. {$endif FPC_STR_ENUM_INTERN}
  1390. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1391. var
  1392. ss: ShortString;
  1393. begin
  1394. fpc_shortstr_bool(b,len,ss);
  1395. s:=UnicodeString(ss);
  1396. end;
  1397. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1398. var
  1399. ss: shortstring;
  1400. begin
  1401. str(c:len:fr,ss);
  1402. s:=UnicodeString(ss);
  1403. end;
  1404. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1405. Var
  1406. SS: ShortString;
  1407. begin
  1408. Str (v:Len,SS);
  1409. S:=UnicodeString(SS);
  1410. end;
  1411. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1412. Var
  1413. SS: ShortString;
  1414. begin
  1415. str(v:Len,SS);
  1416. S:=UnicodeString(SS);
  1417. end;
  1418. {$ifndef CPU64}
  1419. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1420. Var
  1421. SS: ShortString;
  1422. begin
  1423. Str (v:Len,SS);
  1424. S:=UnicodeString(SS);
  1425. end;
  1426. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1427. Var
  1428. SS: ShortString;
  1429. begin
  1430. str(v:Len,SS);
  1431. S:=UnicodeString(SS);
  1432. end;
  1433. {$endif CPU64}
  1434. {$if defined(CPU16) or defined(CPU8)}
  1435. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1436. Var
  1437. SS: ShortString;
  1438. begin
  1439. Str (v:Len,SS);
  1440. S:=UnicodeString(SS);
  1441. end;
  1442. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1443. Var
  1444. SS: ShortString;
  1445. begin
  1446. str(v:Len,SS);
  1447. S:=UnicodeString(SS);
  1448. end;
  1449. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1450. Var
  1451. SS: ShortString;
  1452. begin
  1453. Str (v:Len,SS);
  1454. S:=UnicodeString(SS);
  1455. end;
  1456. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1457. Var
  1458. SS: ShortString;
  1459. begin
  1460. str(v:Len,SS);
  1461. S:=UnicodeString(SS);
  1462. end;
  1463. {$endif CPU16 or CPU8}
  1464. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1465. begin
  1466. if assigned(Source) then
  1467. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1468. else
  1469. Result:=0;
  1470. end;
  1471. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1472. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1473. begin
  1474. runerror(217);
  1475. end;
  1476. {$else EXCLUDE_COMPLEX_PROCS}
  1477. var
  1478. i,j : SizeUInt;
  1479. lw : longword;
  1480. begin
  1481. result:=0;
  1482. if source=nil then
  1483. exit;
  1484. i:=0;
  1485. j:=0;
  1486. if assigned(Dest) then
  1487. begin
  1488. while (i<SourceChars) and (j<MaxDestBytes) do
  1489. begin
  1490. lw:=ord(Source[i]);
  1491. case lw of
  1492. 0..$7f:
  1493. begin
  1494. Dest[j]:=char(lw);
  1495. inc(j);
  1496. end;
  1497. $80..$7ff:
  1498. begin
  1499. if j+1>=MaxDestBytes then
  1500. break;
  1501. Dest[j]:=char($c0 or (lw shr 6));
  1502. Dest[j+1]:=char($80 or (lw and $3f));
  1503. inc(j,2);
  1504. end;
  1505. $800..$d7ff,$e000..$ffff:
  1506. begin
  1507. if j+2>=MaxDestBytes then
  1508. break;
  1509. Dest[j]:=char($e0 or (lw shr 12));
  1510. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1511. Dest[j+2]:=char($80 or (lw and $3f));
  1512. inc(j,3);
  1513. end;
  1514. $d800..$dbff:
  1515. {High Surrogates}
  1516. begin
  1517. if j+3>=MaxDestBytes then
  1518. break;
  1519. if (i+1<sourcechars) and
  1520. (word(Source[i+1]) >= $dc00) and
  1521. (word(Source[i+1]) <= $dfff) then
  1522. begin
  1523. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1524. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1525. Dest[j]:=char($f0 or (lw shr 18));
  1526. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1527. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1528. Dest[j+3]:=char($80 or (lw and $3f));
  1529. inc(j,4);
  1530. inc(i);
  1531. end;
  1532. end;
  1533. end;
  1534. inc(i);
  1535. end;
  1536. if j>SizeUInt(MaxDestBytes-1) then
  1537. j:=MaxDestBytes-1;
  1538. Dest[j]:=#0;
  1539. end
  1540. else
  1541. begin
  1542. while i<SourceChars do
  1543. begin
  1544. case word(Source[i]) of
  1545. $0..$7f:
  1546. inc(j);
  1547. $80..$7ff:
  1548. inc(j,2);
  1549. $800..$d7ff,$e000..$ffff:
  1550. inc(j,3);
  1551. $d800..$dbff:
  1552. begin
  1553. if (i+1<sourcechars) and
  1554. (word(Source[i+1]) >= $dc00) and
  1555. (word(Source[i+1]) <= $dfff) then
  1556. begin
  1557. inc(j,4);
  1558. inc(i);
  1559. end;
  1560. end;
  1561. end;
  1562. inc(i);
  1563. end;
  1564. end;
  1565. result:=j+1;
  1566. end;
  1567. {$endif EXCLUDE_COMPLEX_PROCS}
  1568. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1569. begin
  1570. if assigned(Source) then
  1571. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
  1572. else
  1573. Result:=0;
  1574. end;
  1575. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1576. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1577. begin
  1578. runerror(217);
  1579. end;
  1580. {$else EXCLUDE_COMPLEX_PROCS}
  1581. const
  1582. UNICODE_INVALID=63;
  1583. var
  1584. InputUTF8: SizeUInt;
  1585. IBYTE: BYTE;
  1586. OutputUnicode: SizeUInt;
  1587. PRECHAR: SizeUInt;
  1588. TempBYTE: BYTE;
  1589. CharLen: SizeUint;
  1590. LookAhead: SizeUInt;
  1591. UC: SizeUInt;
  1592. begin
  1593. if not assigned(Source) then
  1594. begin
  1595. result:=0;
  1596. exit;
  1597. end;
  1598. result:=SizeUInt(-1);
  1599. InputUTF8:=0;
  1600. OutputUnicode:=0;
  1601. PreChar:=0;
  1602. if Assigned(Dest) Then
  1603. begin
  1604. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1605. begin
  1606. IBYTE:=byte(Source[InputUTF8]);
  1607. if (IBYTE and $80) = 0 then
  1608. begin
  1609. //One character US-ASCII, convert it to unicode
  1610. if IBYTE = 10 then
  1611. begin
  1612. If (PreChar<>13) and FALSE then
  1613. begin
  1614. //Expand to crlf, conform UTF-8.
  1615. //This procedure will break the memory alocation by
  1616. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1617. if OutputUnicode+1<MaxDestChars then
  1618. begin
  1619. Dest[OutputUnicode]:=WideChar(13);
  1620. inc(OutputUnicode);
  1621. Dest[OutputUnicode]:=WideChar(10);
  1622. inc(OutputUnicode);
  1623. PreChar:=10;
  1624. end
  1625. else
  1626. begin
  1627. Dest[OutputUnicode]:=WideChar(13);
  1628. inc(OutputUnicode);
  1629. end;
  1630. end
  1631. else
  1632. begin
  1633. Dest[OutputUnicode]:=WideChar(IBYTE);
  1634. inc(OutputUnicode);
  1635. PreChar:=IBYTE;
  1636. end;
  1637. end
  1638. else
  1639. begin
  1640. Dest[OutputUnicode]:=WideChar(IBYTE);
  1641. inc(OutputUnicode);
  1642. PreChar:=IBYTE;
  1643. end;
  1644. inc(InputUTF8);
  1645. end
  1646. else
  1647. begin
  1648. TempByte:=IBYTE;
  1649. CharLen:=0;
  1650. while (TempBYTE and $80)<>0 do
  1651. begin
  1652. TempBYTE:=(TempBYTE shl 1) and $FE;
  1653. inc(CharLen);
  1654. end;
  1655. //Test for the "CharLen" conforms UTF-8 string
  1656. //This means the 10xxxxxx pattern.
  1657. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1658. begin
  1659. //Insuficient chars in string to decode
  1660. //UTF-8 array. Fallback to single char.
  1661. CharLen:= 1;
  1662. end;
  1663. for LookAhead := 1 to CharLen-1 do
  1664. begin
  1665. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1666. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1667. begin
  1668. //Invalid UTF-8 sequence, fallback.
  1669. CharLen:= LookAhead;
  1670. break;
  1671. end;
  1672. end;
  1673. UC:=$FFFF;
  1674. case CharLen of
  1675. 1: begin
  1676. //Not valid UTF-8 sequence
  1677. UC:=UNICODE_INVALID;
  1678. end;
  1679. 2: begin
  1680. //Two bytes UTF, convert it
  1681. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1682. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1683. if UC <= $7F then
  1684. begin
  1685. //Invalid UTF sequence.
  1686. UC:=UNICODE_INVALID;
  1687. end;
  1688. end;
  1689. 3: begin
  1690. //Three bytes, convert it to unicode
  1691. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1692. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1693. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1694. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1695. begin
  1696. //Invalid UTF-8 sequence
  1697. UC:= UNICODE_INVALID;
  1698. End;
  1699. end;
  1700. 4: begin
  1701. //Four bytes, convert it to two unicode characters
  1702. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1703. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1704. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1705. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1706. if (UC < $10000) or (UC > $10FFFF) then
  1707. begin
  1708. UC:= UNICODE_INVALID;
  1709. end
  1710. else
  1711. begin
  1712. { only store pair if room }
  1713. dec(UC,$10000);
  1714. if (OutputUnicode<MaxDestChars-1) then
  1715. begin
  1716. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1717. inc(OutputUnicode);
  1718. UC:=(UC and $3ff) + $DC00;
  1719. end
  1720. else
  1721. begin
  1722. InputUTF8:= InputUTF8 + CharLen;
  1723. { don't store anything }
  1724. CharLen:=0;
  1725. end;
  1726. end;
  1727. end;
  1728. 5,6,7: begin
  1729. //Invalid UTF8 to unicode conversion,
  1730. //mask it as invalid UNICODE too.
  1731. UC:=UNICODE_INVALID;
  1732. end;
  1733. end;
  1734. if CharLen > 0 then
  1735. begin
  1736. PreChar:=UC;
  1737. Dest[OutputUnicode]:=WideChar(UC);
  1738. inc(OutputUnicode);
  1739. end;
  1740. InputUTF8:= InputUTF8 + CharLen;
  1741. end;
  1742. end;
  1743. Result:=OutputUnicode+1;
  1744. end
  1745. else
  1746. begin
  1747. while (InputUTF8<SourceBytes) do
  1748. begin
  1749. IBYTE:=byte(Source[InputUTF8]);
  1750. if (IBYTE and $80) = 0 then
  1751. begin
  1752. //One character US-ASCII, convert it to unicode
  1753. if IBYTE = 10 then
  1754. begin
  1755. if (PreChar<>13) and FALSE then
  1756. begin
  1757. //Expand to crlf, conform UTF-8.
  1758. //This procedure will break the memory alocation by
  1759. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1760. inc(OutputUnicode,2);
  1761. PreChar:=10;
  1762. end
  1763. else
  1764. begin
  1765. inc(OutputUnicode);
  1766. PreChar:=IBYTE;
  1767. end;
  1768. end
  1769. else
  1770. begin
  1771. inc(OutputUnicode);
  1772. PreChar:=IBYTE;
  1773. end;
  1774. inc(InputUTF8);
  1775. end
  1776. else
  1777. begin
  1778. TempByte:=IBYTE;
  1779. CharLen:=0;
  1780. while (TempBYTE and $80)<>0 do
  1781. begin
  1782. TempBYTE:=(TempBYTE shl 1) and $FE;
  1783. inc(CharLen);
  1784. end;
  1785. //Test for the "CharLen" conforms UTF-8 string
  1786. //This means the 10xxxxxx pattern.
  1787. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1788. begin
  1789. //Insuficient chars in string to decode
  1790. //UTF-8 array. Fallback to single char.
  1791. CharLen:= 1;
  1792. end;
  1793. for LookAhead := 1 to CharLen-1 do
  1794. begin
  1795. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1796. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1797. begin
  1798. //Invalid UTF-8 sequence, fallback.
  1799. CharLen:= LookAhead;
  1800. break;
  1801. end;
  1802. end;
  1803. UC:=$FFFF;
  1804. case CharLen of
  1805. 1: begin
  1806. //Not valid UTF-8 sequence
  1807. UC:=UNICODE_INVALID;
  1808. end;
  1809. 2: begin
  1810. //Two bytes UTF, convert it
  1811. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1812. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1813. if UC <= $7F then
  1814. begin
  1815. //Invalid UTF sequence.
  1816. UC:=UNICODE_INVALID;
  1817. end;
  1818. end;
  1819. 3: begin
  1820. //Three bytes, convert it to unicode
  1821. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1822. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1823. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1824. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1825. begin
  1826. //Invalid UTF-8 sequence
  1827. UC:= UNICODE_INVALID;
  1828. end;
  1829. end;
  1830. 4: begin
  1831. //Four bytes, convert it to two unicode characters
  1832. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1833. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1834. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1835. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1836. if (UC < $10000) or (UC > $10FFFF) then
  1837. UC:= UNICODE_INVALID
  1838. else
  1839. { extra character character }
  1840. inc(OutputUnicode);
  1841. end;
  1842. 5,6,7: begin
  1843. //Invalid UTF8 to unicode conversion,
  1844. //mask it as invalid UNICODE too.
  1845. UC:=UNICODE_INVALID;
  1846. end;
  1847. end;
  1848. if CharLen > 0 then
  1849. begin
  1850. PreChar:=UC;
  1851. inc(OutputUnicode);
  1852. end;
  1853. InputUTF8:= InputUTF8 + CharLen;
  1854. end;
  1855. end;
  1856. Result:=OutputUnicode+1;
  1857. end;
  1858. end;
  1859. {$endif EXCLUDE_COMPLEX_PROCS}
  1860. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1861. begin
  1862. Result:=UTF8Encode(UnicodeString(s));
  1863. end;
  1864. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1865. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1866. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1867. var
  1868. i : SizeInt;
  1869. hs : UTF8String;
  1870. begin
  1871. result:='';
  1872. if s='' then
  1873. exit;
  1874. SetLength(hs,length(s)*3);
  1875. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1876. if i>0 then
  1877. begin
  1878. SetLength(hs,i-1);
  1879. result:=hs;
  1880. end;
  1881. end;
  1882. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1883. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1884. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1885. function UTF8Decode(const s : RawByteString): UnicodeString;
  1886. var
  1887. i : SizeInt;
  1888. hs : UnicodeString;
  1889. begin
  1890. result:='';
  1891. if s='' then
  1892. exit;
  1893. SetLength(hs,length(s));
  1894. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1895. if i>0 then
  1896. begin
  1897. SetLength(hs,i-1);
  1898. result:=hs;
  1899. end;
  1900. end;
  1901. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1902. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1903. begin
  1904. Result:=Utf8Encode(s);
  1905. end;
  1906. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1907. begin
  1908. Result:=RawByteString(Utf8Decode(s));
  1909. end;
  1910. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1911. var
  1912. i, reslen: sizeint;
  1913. w: longint;
  1914. begin
  1915. reslen:=0;
  1916. i:=0;
  1917. { calculate required length }
  1918. while (i<len) do
  1919. begin
  1920. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1921. inc(i)
  1922. else if (p[i]<=#$dbff) and
  1923. (i+1<len) and
  1924. (p[i+1]>=#$dc00) and
  1925. (p[i+1]<=#$dfff) then
  1926. inc(i,2)
  1927. else
  1928. inc(i);
  1929. inc(reslen);
  1930. end;
  1931. SetLength(res,reslen+1); { +1 for null termination }
  1932. reslen:=0;
  1933. i:=0;
  1934. { do conversion }
  1935. while (i<len) do
  1936. begin
  1937. w:=ord(p[i]);
  1938. if (w<=$d7ff) or (w>=$e000) then
  1939. res[reslen]:=w
  1940. else if (w<=$dbff) and
  1941. (i+1<len) and
  1942. (p[i+1]>=#$dc00) and
  1943. (p[i+1]<=#$dfff) then
  1944. begin
  1945. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1946. inc(i);
  1947. end
  1948. else { invalid surrogate pair }
  1949. res[reslen]:=w;
  1950. inc(i);
  1951. inc(reslen);
  1952. end;
  1953. res[reslen]:=0;
  1954. end;
  1955. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1956. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1957. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1958. begin
  1959. UCS4Encode(PWideChar(s),Length(s),result);
  1960. end;
  1961. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1962. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1963. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1964. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1965. begin
  1966. UCS4Encode(PWideChar(s),Length(s),result);
  1967. end;
  1968. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1969. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1970. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1971. { dest should point to previously allocated wide/unicodestring }
  1972. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1973. var
  1974. i: sizeint;
  1975. nc: UCS4Char;
  1976. begin
  1977. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1978. begin
  1979. nc:=s[i];
  1980. if (nc<=$ffff) then
  1981. dest^:=widechar(nc)
  1982. else if (dword(nc)<=$10ffff) then
  1983. begin
  1984. dest^:=widechar(nc shr 10 + $d7c0);
  1985. { subtracting $10000 doesn't change low 10 bits }
  1986. dest[1]:=widechar(nc and $3ff + $dc00);
  1987. inc(dest);
  1988. end
  1989. else { invalid code point }
  1990. dest^:='?';
  1991. inc(dest);
  1992. end;
  1993. end;
  1994. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1995. var
  1996. i : SizeInt;
  1997. reslen : SizeInt;
  1998. begin
  1999. reslen:=0;
  2000. for i:=0 to length(s)-2 do { skip terminating #0 }
  2001. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  2002. SetLength(result,reslen);
  2003. UCS4Decode(s,pointer(result));
  2004. end;
  2005. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2006. var
  2007. i : SizeInt;
  2008. reslen : SizeInt;
  2009. begin
  2010. reslen:=0;
  2011. for i:=0 to length(s)-2 do { skip terminating #0 }
  2012. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  2013. SetLength(result,reslen);
  2014. UCS4Decode(s,pointer(result));
  2015. end;
  2016. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  2017. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2018. const
  2019. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  2020. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  2021. procedure unimplementedunicodestring;
  2022. begin
  2023. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2024. If IsConsole then
  2025. begin
  2026. Writeln(StdErr,SNoUnicodestrings);
  2027. Writeln(StdErr,SRecompileWithUnicodestrings);
  2028. end;
  2029. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2030. HandleErrorAddrFrameInd(233,get_pc_addr,get_frame);
  2031. end;
  2032. function StringElementSize(const S: UnicodeString): Word; overload;
  2033. begin
  2034. if assigned(Pointer(S)) then
  2035. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2036. else
  2037. Result:=SizeOf(UnicodeChar);
  2038. end;
  2039. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2040. begin
  2041. if assigned(Pointer(S)) then
  2042. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2043. else
  2044. Result:=0;
  2045. end;
  2046. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2047. begin
  2048. {$ifdef FPC_HAS_CPSTRING}
  2049. if assigned(Pointer(S)) then
  2050. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2051. else
  2052. {$endif FPC_HAS_CPSTRING}
  2053. Result:=DefaultUnicodeCodePage;
  2054. end;
  2055. {$warnings off}
  2056. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2057. begin
  2058. unimplementedunicodestring;
  2059. end;
  2060. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2061. begin
  2062. unimplementedunicodestring;
  2063. end;
  2064. function StubWideCase(const s: WideString): WideString;
  2065. begin
  2066. unimplementedunicodestring;
  2067. end;
  2068. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2069. begin
  2070. unimplementedunicodestring;
  2071. end;
  2072. {$warnings on}
  2073. procedure initunicodestringmanager;
  2074. begin
  2075. {$ifndef HAS_WIDESTRINGMANAGER}
  2076. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2077. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2078. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2079. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2080. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2081. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2082. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2083. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2084. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2085. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2086. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2087. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2088. {$endif HAS_WIDESTRINGMANAGER}
  2089. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2090. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2091. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2092. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2093. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2094. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2095. end;
  2096. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2097. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2098. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2099. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2100. Begin
  2101. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2102. DefaultFileSystemCodePage,Length(Str));
  2103. End;
  2104. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2105. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2106. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2107. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2108. Begin
  2109. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2110. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2111. End;
  2112. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2113. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2114. Begin
  2115. Result:=Str;
  2116. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2117. End;
  2118. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2119. ignore any codepage }
  2120. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2121. begin
  2122. Result := UTF8Decode(S);
  2123. end;
  2124. function UTF8ToString(const S: ShortString): UnicodeString;
  2125. Var
  2126. rs: RawByteString;
  2127. begin
  2128. rs:=S;
  2129. Result := UTF8Decode(rs);
  2130. end;
  2131. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2132. var
  2133. rs: RawByteString;
  2134. Count: Integer;
  2135. begin
  2136. Count := length(S);
  2137. SetLength(rs, Count);
  2138. if Count > 0 then
  2139. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2140. Result := UTF8ToString(rs);
  2141. end;
  2142. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2143. are as well }
  2144. {$ifndef CPUJVM}
  2145. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2146. var
  2147. rs: RawByteString;
  2148. Count: Integer;
  2149. begin
  2150. Count := Length(S);
  2151. SetLength(rs, Count);
  2152. if Count > 0 then
  2153. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2154. Result := UTF8ToString(rs);
  2155. end;
  2156. function UTF8ToString(const S: array of Byte): UnicodeString;
  2157. var
  2158. rs: RawByteString;
  2159. Count: Integer;
  2160. begin
  2161. Count := Length(S);
  2162. SetLength(rs, Count);
  2163. if Count > 0 then
  2164. fpc_pchar_ansistr_intern_charmove(pchar(@S),Low(S),rs,0,Count);
  2165. Result := UTF8ToString(rs);
  2166. end;
  2167. {$endif not CPUJVM}