ustrings.inc 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to :
  19. @-8 : SizeInt for reference count;
  20. @-4 : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. end;
  40. Const
  41. UnicodeFirstOff = SizeOf(TUnicodeRec);
  42. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overridden for the Current Locale
  47. }
  48. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  49. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  50. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  51. var
  52. i : SizeInt;
  53. p : PAnsiChar;
  54. begin
  55. setlength(dest,len);
  56. if not assigned(pointer(dest)) then
  57. exit;
  58. PAnsiRec(dest)^.CodePage:=cp;
  59. p:=pointer(dest); {SetLength guarantees that dest is unique}
  60. for i:=1 to len do
  61. begin
  62. if word(source^)<256 then
  63. p^:=char(word(source^))
  64. else
  65. p^:='?';
  66. inc(source);
  67. inc(p);
  68. end;
  69. end;
  70. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  71. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  72. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  73. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  74. var
  75. i : SizeInt;
  76. p : PUnicodeChar;
  77. begin
  78. setlength(dest,len);
  79. p:=pointer(dest); {SetLength guarantees that dest is unique}
  80. for i:=1 to len do
  81. begin
  82. p^:=unicodechar(byte(source^));
  83. inc(source);
  84. inc(p);
  85. end;
  86. end;
  87. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  88. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  89. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  90. begin
  91. DefaultCharLengthPChar:=length(Str);
  92. end;
  93. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  94. begin
  95. if str[0]<>#0 then
  96. DefaultCodePointLength:=1
  97. else
  98. DefaultCodePointLength:=0;
  99. end;
  100. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  101. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  102. begin
  103. { don't raise an exception here. We need this for text file handling }
  104. Result:=DefaultSystemCodePage;
  105. end;
  106. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  107. begin
  108. manager:=widestringmanager;
  109. end;
  110. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  111. begin
  112. Old:=widestringmanager;
  113. widestringmanager:=New;
  114. end;
  115. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  116. begin
  117. widestringmanager:=New;
  118. end;
  119. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  120. begin
  121. manager:=widestringmanager;
  122. end;
  123. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  124. begin
  125. Old:=widestringmanager;
  126. widestringmanager:=New;
  127. end;
  128. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  129. begin
  130. widestringmanager:=New;
  131. end;
  132. {****************************************************************************
  133. Internal functions, not in interface.
  134. ****************************************************************************}
  135. procedure UnicodeStringError;
  136. begin
  137. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  138. end;
  139. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  140. {$define FPC_HAS_NEW_UNICODESTRING}
  141. Function NewUnicodeString(Len : SizeInt) : Pointer;
  142. {
  143. Allocate a new UnicodeString on the heap.
  144. initialize it to zero length and reference count 1.
  145. }
  146. Var
  147. P : Pointer;
  148. begin
  149. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  150. If P<>Nil then
  151. begin
  152. PUnicodeRec(P)^.Len:=Len; { Initial length }
  153. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  154. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  155. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  156. inc(p,UnicodeFirstOff); { Points to string now }
  157. PUnicodeChar(P)^:=#0; { Terminating #0 }
  158. end
  159. else
  160. UnicodeStringError;
  161. NewUnicodeString:=P;
  162. end;
  163. {$endif FPC_HAS_NEW_UNICODESTRING}
  164. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  165. {$define FPC_HAS_UNICODESTR_DECR_REF}
  166. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  167. {
  168. Decreases the ReferenceCount of a non constant unicodestring;
  169. If the reference count is zero, deallocate the string;
  170. }
  171. Var
  172. p: PUnicodeRec;
  173. Begin
  174. { Zero string }
  175. if S=Nil then
  176. exit;
  177. { check for constant strings ...}
  178. p:=PUnicodeRec(S-UnicodeFirstOff);
  179. S:=nil;
  180. if p^.Ref<0 then
  181. exit;
  182. { declocked does a MT safe dec and returns true, if the counter is 0 }
  183. if declocked(p^.Ref) then
  184. FreeMem(p);
  185. end;
  186. { alias for internal use }
  187. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  188. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  189. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  190. {$define FPC_HAS_UNICODESTR_INCR_REF}
  191. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  192. Begin
  193. If S=Nil then
  194. exit;
  195. { constant string ? }
  196. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  197. exit;
  198. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  199. end;
  200. { alias for internal use }
  201. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  202. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  203. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  204. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  205. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  206. {
  207. Converts a UnicodeString to a ShortString;
  208. }
  209. Var
  210. Size : SizeInt;
  211. temp : ansistring;
  212. begin
  213. res:='';
  214. Size:=Length(S2);
  215. if Size>0 then
  216. begin
  217. If Size>high(res) then
  218. Size:=high(res);
  219. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  220. res:=temp;
  221. end;
  222. end;
  223. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  224. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  225. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  226. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  227. {
  228. Converts a ShortString to a UnicodeString;
  229. }
  230. Var
  231. Size : SizeInt;
  232. begin
  233. result:='';
  234. Size:=Length(S2);
  235. if Size>0 then
  236. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  237. end;
  238. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  239. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  240. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  241. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  242. {
  243. Converts a UnicodeString to an AnsiString
  244. }
  245. Var
  246. Size : SizeInt;
  247. {$ifndef FPC_HAS_CPSTRING}
  248. cp : TSystemCodePage;
  249. {$endif FPC_HAS_CPSTRING}
  250. begin
  251. {$ifndef FPC_HAS_CPSTRING}
  252. cp:=DefaultSystemCodePage;
  253. {$endif FPC_HAS_CPSTRING}
  254. result:='';
  255. Size:=Length(S2);
  256. if Size>0 then
  257. begin
  258. if (cp=CP_ACP) then
  259. cp:=DefaultSystemCodePage;
  260. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  261. end;
  262. end;
  263. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  264. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  265. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  266. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  267. {
  268. Converts an AnsiString to a UnicodeString;
  269. }
  270. Var
  271. Size : SizeInt;
  272. cp: TSystemCodePage;
  273. begin
  274. result:='';
  275. Size:=Length(S2);
  276. if Size>0 then
  277. begin
  278. cp:=StringCodePage(S2);
  279. if (cp=CP_ACP) then
  280. cp:=DefaultSystemCodePage;
  281. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  282. end;
  283. end;
  284. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  285. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  286. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  287. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  288. begin
  289. SetLength(Result,Length(S2));
  290. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  291. end;
  292. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  293. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  294. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  295. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  296. begin
  297. SetLength(Result,Length(S2));
  298. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  299. end;
  300. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  301. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  302. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  303. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  304. var
  305. Size : SizeInt;
  306. begin
  307. result:='';
  308. if p=nil then
  309. exit;
  310. Size := IndexWord(p^, -1, 0);
  311. Setlength(result,Size);
  312. if Size>0 then
  313. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  314. end;
  315. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  316. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  317. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  318. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  319. var
  320. Size : SizeInt;
  321. {$ifndef FPC_HAS_CPSTRING}
  322. cp : TSystemCodePage;
  323. {$endif FPC_HAS_CPSTRING}
  324. begin
  325. {$ifndef FPC_HAS_CPSTRING}
  326. cp:=DefaultSystemCodePage;
  327. {$endif FPC_HAS_CPSTRING}
  328. result:='';
  329. if p=nil then
  330. exit;
  331. Size := IndexWord(p^, -1, 0);
  332. if Size>0 then
  333. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  334. end;
  335. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  336. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  337. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  338. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  339. var
  340. Size : SizeInt;
  341. temp: ansistring;
  342. begin
  343. res:='';
  344. if p=nil then
  345. exit;
  346. Size:=IndexWord(p^, high(PtrInt), 0);
  347. if Size>0 then
  348. begin
  349. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  350. res:=temp;
  351. end;
  352. end;
  353. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  354. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  355. {$define FPC_UNICODESTR_ASSIGN}
  356. { checked against the ansistring routine, 2001-05-27 (FK) }
  357. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  358. {
  359. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  360. }
  361. begin
  362. If S2<>nil then
  363. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  364. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  365. { Decrease the reference count on the old S1 }
  366. fpc_unicodestr_decr_ref (S1);
  367. s1:=s2;
  368. end;
  369. { alias for internal use }
  370. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  371. {$endif FPC_UNICODESTR_ASSIGN}
  372. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  373. {$define FPC_HAS_UNICODESTR_CONCAT}
  374. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  375. Var
  376. Size,Location : SizeInt;
  377. same : boolean;
  378. begin
  379. { only assign if s1 or s2 is empty }
  380. if (S1='') then
  381. begin
  382. DestS:=s2;
  383. exit;
  384. end;
  385. if (S2='') then
  386. begin
  387. DestS:=s1;
  388. exit;
  389. end;
  390. Location:=Length(S1);
  391. Size:=length(S2);
  392. { Use Pointer() typecasts to prevent extra conversion code }
  393. if Pointer(DestS)=Pointer(S1) then
  394. begin
  395. same:=Pointer(S1)=Pointer(S2);
  396. SetLength(DestS,Size+Location);
  397. if same then
  398. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  399. else
  400. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  401. end
  402. else if Pointer(DestS)=Pointer(S2) then
  403. begin
  404. SetLength(DestS,Size+Location);
  405. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  406. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  407. end
  408. else
  409. begin
  410. DestS:='';
  411. SetLength(DestS,Size+Location);
  412. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  413. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  414. end;
  415. end;
  416. {$endif FPC_HAS_UNICODESTR_CONCAT}
  417. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  418. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  419. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  420. Var
  421. i : Longint;
  422. p,pc : pointer;
  423. Size,NewLen : SizeInt;
  424. lowstart : longint;
  425. destcopy : pointer;
  426. OldDestLen : SizeInt;
  427. begin
  428. if high(sarr)=0 then
  429. begin
  430. DestS:='';
  431. exit;
  432. end;
  433. destcopy:=nil;
  434. lowstart:=low(sarr);
  435. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  436. inc(lowstart);
  437. { Check for another reuse, then we can't use
  438. the append optimization }
  439. for i:=lowstart to high(sarr) do
  440. begin
  441. if Pointer(DestS)=Pointer(sarr[i]) then
  442. begin
  443. { if DestS is used somewhere in the middle of the expression,
  444. we need to make sure the original string still exists after
  445. we empty/modify DestS.
  446. This trick only works with reference counted strings. Therefor
  447. this optimization is disabled for WINLIKEUNICODESTRING }
  448. destcopy:=pointer(dests);
  449. fpc_UnicodeStr_Incr_Ref(destcopy);
  450. lowstart:=low(sarr);
  451. break;
  452. end;
  453. end;
  454. { Start with empty DestS if we start with concatting
  455. the first array element }
  456. if lowstart=low(sarr) then
  457. DestS:='';
  458. OldDestLen:=length(DestS);
  459. { Calculate size of the result so we can do
  460. a single call to SetLength() }
  461. NewLen:=0;
  462. for i:=low(sarr) to high(sarr) do
  463. inc(NewLen,length(sarr[i]));
  464. SetLength(DestS,NewLen);
  465. { Concat all strings, except the string we already
  466. copied in DestS }
  467. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  468. for i:=lowstart to high(sarr) do
  469. begin
  470. p:=pointer(sarr[i]);
  471. if assigned(p) then
  472. begin
  473. Size:=length(unicodestring(p));
  474. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  475. inc(pc,size*sizeof(UnicodeChar));
  476. end;
  477. end;
  478. fpc_UnicodeStr_Decr_Ref(destcopy);
  479. end;
  480. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  481. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  482. {$define FPC_HAS_CHAR_TO_UCHAR}
  483. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  484. var
  485. w: unicodestring;
  486. begin
  487. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  488. fpc_Char_To_UChar:=w[1];
  489. end;
  490. {$endif FPC_HAS_CHAR_TO_UCHAR}
  491. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  492. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  493. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  494. {
  495. Converts a Char to a UnicodeString;
  496. }
  497. begin
  498. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  499. end;
  500. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  501. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  502. {$define FPC_HAS_UCHAR_TO_CHAR}
  503. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  504. {
  505. Converts a UnicodeChar to a Char;
  506. }
  507. var
  508. s: ansistring;
  509. begin
  510. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  511. if length(s)=1 then
  512. fpc_UChar_To_Char:= s[1]
  513. else
  514. fpc_UChar_To_Char:='?';
  515. end;
  516. {$endif FPC_HAS_UCHAR_TO_CHAR}
  517. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  518. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  519. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  520. {
  521. Converts a WideChar to a ShortString;
  522. }
  523. var
  524. s: ansistring;
  525. begin
  526. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  527. res:=s;
  528. end;
  529. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  530. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  531. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  532. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  533. {
  534. Converts a UnicodeChar to a UnicodeString;
  535. }
  536. begin
  537. Setlength (fpc_UChar_To_UnicodeStr,1);
  538. fpc_UChar_To_UnicodeStr[1]:= c;
  539. end;
  540. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  541. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  542. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  543. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  544. {
  545. Converts a UnicodeChar to a AnsiString;
  546. }
  547. {$ifndef FPC_HAS_CPSTRING}
  548. var
  549. cp : TSystemCodePage;
  550. {$endif FPC_HAS_CPSTRING}
  551. begin
  552. {$ifndef FPC_HAS_CPSTRING}
  553. cp:=DefaultSystemCodePage;
  554. {$endif FPC_HAS_CPSTRING}
  555. if (cp=CP_ACP) then
  556. cp:=DefaultSystemCodePage;
  557. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  558. end;
  559. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  560. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  561. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  562. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  563. Var
  564. L : SizeInt;
  565. begin
  566. if (not assigned(p)) or (p[0]=#0) Then
  567. begin
  568. fpc_pchar_to_unicodestr := '';
  569. exit;
  570. end;
  571. l:=IndexChar(p^,-1,#0);
  572. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  573. end;
  574. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  575. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  576. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  577. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  578. var
  579. i : SizeInt;
  580. begin
  581. if zerobased then
  582. begin
  583. if arr[0]=#0 Then
  584. begin
  585. fpc_chararray_to_unicodestr:='';
  586. exit;
  587. end;
  588. i:=IndexChar(arr,high(arr)+1,#0);
  589. if i=-1 then
  590. i:=high(arr)+1;
  591. end
  592. else
  593. i:=high(arr)+1;
  594. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  595. end;
  596. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  597. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  598. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  599. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  600. var
  601. i : SizeInt;
  602. begin
  603. if (zerobased) then
  604. begin
  605. i:=IndexWord(arr,high(arr)+1,0);
  606. if i = -1 then
  607. i := high(arr)+1;
  608. end
  609. else
  610. i := high(arr)+1;
  611. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  612. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  613. end;
  614. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  615. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  616. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  617. { due to their names, the following procedures should be in wstrings.inc,
  618. however, the compiler generates code using this functions on all platforms }
  619. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  620. var
  621. l: longint;
  622. index: ptrint;
  623. len: byte;
  624. temp: ansistring;
  625. begin
  626. l := high(arr)+1;
  627. if l>=high(res)+1 then
  628. l:=high(res)
  629. else if l<0 then
  630. l:=0;
  631. if zerobased then
  632. begin
  633. index:=IndexWord(arr[0],l,0);
  634. if index<0 then
  635. len:=l
  636. else
  637. len:=index;
  638. end
  639. else
  640. len:=l;
  641. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  642. res:=temp;
  643. end;
  644. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  645. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  646. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  647. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  648. var
  649. i : SizeInt;
  650. {$ifndef FPC_HAS_CPSTRING}
  651. cp : TSystemCodePage;
  652. {$endif FPC_HAS_CPSTRING}
  653. begin
  654. {$ifndef FPC_HAS_CPSTRING}
  655. cp:=DefaultSystemCodePage;
  656. {$endif FPC_HAS_CPSTRING}
  657. if (zerobased) then
  658. begin
  659. i:=IndexWord(arr,high(arr)+1,0);
  660. if i = -1 then
  661. i := high(arr)+1;
  662. end
  663. else
  664. i := high(arr)+1;
  665. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  666. end;
  667. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  668. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  669. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  670. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  671. var
  672. i : SizeInt;
  673. begin
  674. if (zerobased) then
  675. begin
  676. i:=IndexWord(arr,high(arr)+1,0);
  677. if i = -1 then
  678. i := high(arr)+1;
  679. end
  680. else
  681. i := high(arr)+1;
  682. SetLength(fpc_WideCharArray_To_WideStr,i);
  683. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  684. end;
  685. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  686. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  687. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  688. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  689. var
  690. len: SizeInt;
  691. temp: ansistring;
  692. begin
  693. len := length(src);
  694. { make sure we don't dereference src if it can be nil (JM) }
  695. if len > 0 then
  696. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  697. len := length(temp);
  698. if len > length(res) then
  699. len := length(res);
  700. {$push}
  701. {$r-}
  702. move(temp[1],res[0],len);
  703. fillchar(res[len],length(res)-len,0);
  704. {$pop}
  705. end;
  706. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  707. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  708. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  709. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  710. var
  711. len: SizeInt;
  712. temp: widestring;
  713. begin
  714. len := length(src);
  715. { make sure we don't dereference src if it can be nil (JM) }
  716. if len > 0 then
  717. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  718. len := length(temp);
  719. if len > length(res) then
  720. len := length(res);
  721. {$push}
  722. {$r-}
  723. move(temp[1],res[0],len*sizeof(widechar));
  724. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  725. {$pop}
  726. end;
  727. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  728. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  729. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  730. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  731. var
  732. len: longint;
  733. temp : widestring;
  734. begin
  735. len := length(src);
  736. { make sure we don't access char 1 if length is 0 (JM) }
  737. if len > 0 then
  738. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  739. len := length(temp);
  740. if len > length(res) then
  741. len := length(res);
  742. {$push}
  743. {$r-}
  744. move(temp[1],res[0],len*sizeof(widechar));
  745. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  746. {$pop}
  747. end;
  748. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  749. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  750. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  751. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  752. var
  753. len: SizeInt;
  754. begin
  755. len := length(src);
  756. if len > length(res) then
  757. len := length(res);
  758. {$push}
  759. {$r-}
  760. { make sure we don't try to access element 1 of the widestring if it's nil }
  761. if len > 0 then
  762. move(src[1],res[0],len*SizeOf(WideChar));
  763. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  764. {$pop}
  765. end;
  766. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  767. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  768. {$define FPC_HAS_UNICODESTR_COMPARE}
  769. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  770. {
  771. Compares 2 UnicodeStrings;
  772. The result is
  773. <0 if S1<S2
  774. 0 if S1=S2
  775. >0 if S1>S2
  776. }
  777. Var
  778. MaxI,Temp : SizeInt;
  779. begin
  780. if pointer(S1)=pointer(S2) then
  781. begin
  782. fpc_UnicodeStr_Compare:=0;
  783. exit;
  784. end;
  785. Maxi:=Length(S1);
  786. temp:=Length(S2);
  787. If MaxI>Temp then
  788. MaxI:=Temp;
  789. Temp:=CompareWord(S1[1],S2[1],MaxI);
  790. if temp=0 then
  791. temp:=Length(S1)-Length(S2);
  792. fpc_UnicodeStr_Compare:=Temp;
  793. end;
  794. {$endif FPC_HAS_UNICODESTR_COMPARE}
  795. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  796. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  797. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  798. {
  799. Compares 2 UnicodeStrings for equality only;
  800. The result is
  801. 0 if S1=S2
  802. <>0 if S1<>S2
  803. }
  804. Var
  805. MaxI : SizeInt;
  806. begin
  807. if pointer(S1)=pointer(S2) then
  808. exit(0);
  809. Maxi:=Length(S1);
  810. If MaxI<>Length(S2) then
  811. exit(-1)
  812. else
  813. exit(CompareWord(S1[1],S2[1],MaxI));
  814. end;
  815. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  816. {$ifdef VER2_4}
  817. // obsolete but needed for bootstrapping with 2.4
  818. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  819. begin
  820. if p=nil then
  821. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  822. end;
  823. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  824. begin
  825. if (index>len) or (Index<1) then
  826. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  827. end;
  828. {$else VER2_4}
  829. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  830. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  831. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  832. begin
  833. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  834. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  835. end;
  836. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  837. {$endif VER2_4}
  838. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  839. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  840. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  841. {
  842. Sets The length of string S to L.
  843. Makes sure S is unique, and contains enough room.
  844. }
  845. Var
  846. Temp : Pointer;
  847. movelen: SizeInt;
  848. lens, lena : SizeUInt;
  849. begin
  850. if (l>0) then
  851. begin
  852. if Pointer(S)=nil then
  853. begin
  854. { Need a complete new string...}
  855. Pointer(s):=NewUnicodeString(l);
  856. end
  857. else
  858. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  859. begin
  860. Temp:=Pointer(s)-UnicodeFirstOff;
  861. lens:=MemSize(Temp);
  862. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  863. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  864. begin
  865. reallocmem(Temp, lena);
  866. Pointer(S):=Temp+UnicodeFirstOff;
  867. end;
  868. end
  869. else
  870. begin
  871. { Reallocation is needed... }
  872. Temp:=NewUnicodeString(L);
  873. if Length(S)>0 then
  874. begin
  875. if l < succ(length(s)) then
  876. movelen := l
  877. { also move terminating null }
  878. else
  879. movelen := succ(length(s));
  880. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  881. end;
  882. fpc_unicodestr_decr_ref(Pointer(S));
  883. Pointer(S):=Temp;
  884. end;
  885. { Force nil termination in case it gets shorter }
  886. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  887. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  888. end
  889. else { length=0, deallocate the string }
  890. fpc_unicodestr_decr_ref (Pointer(S));
  891. end;
  892. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  893. {*****************************************************************************
  894. Public functions, In interface.
  895. *****************************************************************************}
  896. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  897. begin
  898. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  899. end;
  900. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  901. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  902. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  903. begin
  904. result:=StringToWideChar(Src,Dest,DestSize);
  905. end;
  906. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  907. function WideCharToString(S : PWideChar) : UnicodeString;
  908. begin
  909. result:=WideCharLenToString(s,Length(WideString(s)));
  910. end;
  911. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  912. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  913. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  914. var
  915. temp: widestring;
  916. Len: SizeInt;
  917. begin
  918. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  919. Len:=Length(temp);
  920. if DestSize<=Len then
  921. Len:=Destsize-1;
  922. move(temp[1],Dest^,Len*SizeOf(WideChar));
  923. Dest[Len]:=#0;
  924. result:=Dest;
  925. end;
  926. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  927. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  928. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  929. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  930. begin
  931. SetLength(result,Len);
  932. Move(S^,Pointer(Result)^,Len*2);
  933. end;
  934. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  935. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  936. begin
  937. Dest:=UnicodeCharLenToString(Src,Len);
  938. end;
  939. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  940. begin
  941. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  942. end;
  943. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  944. begin
  945. Dest:=AnsiString(UnicodeCharToString(S));
  946. end;
  947. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  948. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  949. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  950. begin
  951. SetLength(result,Len);
  952. Move(S^,Pointer(Result)^,Len*2);
  953. end;
  954. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  955. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  956. begin
  957. Dest:=WideCharLenToString(Src,Len);
  958. end;
  959. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  960. begin
  961. Dest:=AnsiString(WideCharLenToString(Src,Len));
  962. end;
  963. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  964. begin
  965. Dest:=WideCharToString(S);
  966. end;
  967. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  968. begin
  969. Dest:=AnsiString(WideCharToString(S));
  970. end;
  971. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  972. {$define FPC_HAS_UNICODESTR_UNIQUE}
  973. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  974. {
  975. Make sure reference count of S is 1,
  976. using copy-on-write semantics.
  977. }
  978. Var
  979. SNew : Pointer;
  980. L : SizeInt;
  981. begin
  982. pointer(result) := pointer(s);
  983. If Pointer(S)=Nil then
  984. exit;
  985. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  986. begin
  987. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  988. SNew:=NewUnicodeString (L);
  989. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  990. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  991. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  992. pointer(S):=SNew;
  993. pointer(result):=SNew;
  994. end;
  995. end;
  996. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  997. {$ifndef FPC_HAS_UNICODESTR_COPY}
  998. {$define FPC_HAS_UNICODESTR_COPY}
  999. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1000. var
  1001. ResultAddress : Pointer;
  1002. begin
  1003. ResultAddress:=Nil;
  1004. dec(index);
  1005. if Index < 0 then
  1006. Index := 0;
  1007. { Check Size. Accounts for Zero-length S, the double check is needed because
  1008. Size can be maxint and will get <0 when adding index }
  1009. if (Size>Length(S)) or
  1010. (Index+Size>Length(S)) then
  1011. Size:=Length(S)-Index;
  1012. If Size>0 then
  1013. begin
  1014. ResultAddress:=NewUnicodeString(Size);
  1015. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1016. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1017. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1018. end;
  1019. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1020. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1021. end;
  1022. {$endif FPC_HAS_UNICODESTR_COPY}
  1023. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1024. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1025. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1026. var
  1027. i,MaxLen : SizeInt;
  1028. pc : punicodechar;
  1029. begin
  1030. Pos:=0;
  1031. if Length(SubStr)>0 then
  1032. begin
  1033. MaxLen:=Length(source)-Length(SubStr);
  1034. i:=0;
  1035. pc:=@source[1];
  1036. while (i<=MaxLen) do
  1037. begin
  1038. inc(i);
  1039. if (SubStr[1]=pc^) and
  1040. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1041. begin
  1042. Pos:=i;
  1043. exit;
  1044. end;
  1045. inc(pc);
  1046. end;
  1047. end;
  1048. end;
  1049. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1050. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1051. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1052. { Faster version for a unicodechar alone }
  1053. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1054. var
  1055. i: SizeInt;
  1056. pc : punicodechar;
  1057. begin
  1058. pc:=@s[1];
  1059. for i:=1 to length(s) do
  1060. begin
  1061. if pc^=c then
  1062. begin
  1063. pos:=i;
  1064. exit;
  1065. end;
  1066. inc(pc);
  1067. end;
  1068. pos:=0;
  1069. end;
  1070. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1071. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1072. block, which is significant bloat without any sensible speed improvement. }
  1073. Function Pos (const c : RawByteString; Const s : UnicodeString) : SizeInt;
  1074. begin
  1075. result:=Pos(UnicodeString(c),s);
  1076. end;
  1077. Function Pos (const c : ShortString; Const s : UnicodeString) : SizeInt;
  1078. begin
  1079. result:=Pos(UnicodeString(c),s);
  1080. end;
  1081. Function Pos (const c : UnicodeString; Const s : RawByteString) : SizeInt;
  1082. begin
  1083. result:=Pos(c,UnicodeString(s));
  1084. end;
  1085. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1086. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1087. { Faster version for a char alone. Must be implemented because }
  1088. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1089. { using pos(char,pchar) will always call the shortstring version }
  1090. { (exact match for first argument), also with $h+ (JM) }
  1091. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1092. var
  1093. i: SizeInt;
  1094. wc : unicodechar;
  1095. pc : punicodechar;
  1096. begin
  1097. wc:=c;
  1098. pc:=@s[1];
  1099. for i:=1 to length(s) do
  1100. begin
  1101. if pc^=wc then
  1102. begin
  1103. pos:=i;
  1104. exit;
  1105. end;
  1106. inc(pc);
  1107. end;
  1108. pos:=0;
  1109. end;
  1110. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1111. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1112. {$define FPC_HAS_DELETE_UNICODESTR}
  1113. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1114. Var
  1115. LS : SizeInt;
  1116. begin
  1117. LS:=Length(S);
  1118. if (Index>LS) or (Index<=0) or (Size<=0) then
  1119. exit;
  1120. UniqueString (S);
  1121. { (Size+Index) will overflow if Size=MaxInt. }
  1122. if Size>LS-Index then
  1123. Size:=LS-Index+1;
  1124. if Size<=LS-Index then
  1125. begin
  1126. Dec(Index);
  1127. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1128. end;
  1129. Setlength(s,LS-Size);
  1130. end;
  1131. {$endif FPC_HAS_DELETE_UNICODESTR}
  1132. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1133. {$define FPC_HAS_INSERT_UNICODESTR}
  1134. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1135. var
  1136. Temp : UnicodeString;
  1137. LS : SizeInt;
  1138. begin
  1139. If Length(Source)=0 then
  1140. exit;
  1141. if index <= 0 then
  1142. index := 1;
  1143. Ls:=Length(S);
  1144. if index > LS then
  1145. index := LS+1;
  1146. Dec(Index);
  1147. SetLength(Temp,Length(Source)+LS);
  1148. If Index>0 then
  1149. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1150. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1151. If (LS-Index)>0 then
  1152. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1153. S:=Temp;
  1154. end;
  1155. {$endif FPC_HAS_INSERT_UNICODESTR}
  1156. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1157. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1158. Function UpCase(c:UnicodeChar):UnicodeChar;
  1159. var
  1160. s : UnicodeString;
  1161. begin
  1162. s:=c;
  1163. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1164. end;
  1165. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1166. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1167. {$define FPC_HAS_UPCASE_UNICODESTR}
  1168. function UpCase(const s : UnicodeString) : UnicodeString;
  1169. begin
  1170. result:=widestringmanager.UpperUnicodeStringProc(s);
  1171. end;
  1172. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1173. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1174. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1175. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1176. var
  1177. s : UnicodeString;
  1178. begin
  1179. s:=c;
  1180. result:=widestringmanager.LowerUnicodeStringProc(s)[1];
  1181. end;
  1182. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1183. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1184. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1185. function LowerCase(const s : UnicodeString) : UnicodeString;
  1186. begin
  1187. result:=widestringmanager.LowerUnicodeStringProc(s);
  1188. end;
  1189. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1190. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1191. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1192. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1193. begin
  1194. SetLength(S,Len);
  1195. If (Buf<>Nil) and (Len>0) then
  1196. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1197. end;
  1198. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1199. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1200. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1201. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1202. begin
  1203. If (Buf<>Nil) and (Len>0) then
  1204. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1205. else
  1206. SetLength(S,Len);
  1207. end;
  1208. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1209. {$ifndef FPUNONE}
  1210. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1211. Var
  1212. SS: ShortString;
  1213. begin
  1214. fpc_Val_Real_UnicodeStr:=0;
  1215. if length(S)>255 then
  1216. code:=256
  1217. else
  1218. begin
  1219. SS:=ShortString(S);
  1220. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1221. end;
  1222. end;
  1223. {$endif}
  1224. {$ifndef FPC_STR_ENUM_INTERN}
  1225. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1226. var
  1227. ss: ShortString;
  1228. begin
  1229. if length(s)>255 then
  1230. code:=256
  1231. else
  1232. begin
  1233. ss:=ShortString(s);
  1234. val(ss,fpc_val_enum_unicodestr,code);
  1235. end;
  1236. end;
  1237. {$endif FPC_STR_ENUM_INTERN}
  1238. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1239. Var
  1240. SS: ShortString;
  1241. begin
  1242. if length(S)>255 then
  1243. begin
  1244. fpc_Val_Currency_UnicodeStr:=0;
  1245. code:=256;
  1246. end
  1247. else
  1248. begin
  1249. SS:=ShortString(S);
  1250. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1251. end;
  1252. end;
  1253. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1254. Var
  1255. SS: ShortString;
  1256. begin
  1257. fpc_Val_UInt_UnicodeStr:=0;
  1258. if length(S)>255 then
  1259. code:=256
  1260. else
  1261. begin
  1262. SS:=ShortString(S);
  1263. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1264. end;
  1265. end;
  1266. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1267. Var
  1268. SS: ShortString;
  1269. begin
  1270. fpc_Val_SInt_UnicodeStr:=0;
  1271. if length(S)>255 then
  1272. code:=256
  1273. else
  1274. begin
  1275. SS:=ShortString(S);
  1276. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1277. end;
  1278. end;
  1279. {$ifndef CPU64}
  1280. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1281. Var
  1282. SS: ShortString;
  1283. begin
  1284. fpc_Val_qword_UnicodeStr:=0;
  1285. if length(S)>255 then
  1286. code:=256
  1287. else
  1288. begin
  1289. SS:=ShortString(S);
  1290. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1291. end;
  1292. end;
  1293. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1294. Var
  1295. SS: ShortString;
  1296. begin
  1297. fpc_Val_int64_UnicodeStr:=0;
  1298. if length(S)>255 then
  1299. code:=256
  1300. else
  1301. begin
  1302. SS:=ShortString(S);
  1303. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1304. end;
  1305. end;
  1306. {$endif CPU64}
  1307. {$ifndef FPUNONE}
  1308. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1309. var
  1310. ss: shortstring;
  1311. begin
  1312. str_real(len,fr,d,treal_type(rt),ss);
  1313. s:=UnicodeString(ss);
  1314. end;
  1315. {$endif}
  1316. {$ifndef FPC_STR_ENUM_INTERN}
  1317. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1318. var
  1319. ss: ShortString;
  1320. begin
  1321. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1322. s:=UnicodeString(ss);
  1323. end;
  1324. {$endif FPC_STR_ENUM_INTERN}
  1325. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1326. var
  1327. ss: ShortString;
  1328. begin
  1329. fpc_shortstr_bool(b,len,ss);
  1330. s:=UnicodeString(ss);
  1331. end;
  1332. {$ifdef FPC_HAS_STR_CURRENCY}
  1333. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1334. var
  1335. ss: shortstring;
  1336. begin
  1337. str(c:len:fr,ss);
  1338. s:=UnicodeString(ss);
  1339. end;
  1340. {$endif FPC_HAS_STR_CURRENCY}
  1341. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1342. Var
  1343. SS: ShortString;
  1344. begin
  1345. Str (v:Len,SS);
  1346. S:=UnicodeString(SS);
  1347. end;
  1348. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1349. Var
  1350. SS: ShortString;
  1351. begin
  1352. str(v:Len,SS);
  1353. S:=UnicodeString(SS);
  1354. end;
  1355. {$ifndef CPU64}
  1356. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1357. Var
  1358. SS: ShortString;
  1359. begin
  1360. Str (v:Len,SS);
  1361. S:=UnicodeString(SS);
  1362. end;
  1363. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1364. Var
  1365. SS: ShortString;
  1366. begin
  1367. str(v:Len,SS);
  1368. S:=UnicodeString(SS);
  1369. end;
  1370. {$endif CPU64}
  1371. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1372. begin
  1373. if assigned(Source) then
  1374. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1375. else
  1376. Result:=0;
  1377. end;
  1378. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1379. var
  1380. i,j : SizeUInt;
  1381. lw : longword;
  1382. begin
  1383. result:=0;
  1384. if source=nil then
  1385. exit;
  1386. i:=0;
  1387. j:=0;
  1388. if assigned(Dest) then
  1389. begin
  1390. while (i<SourceChars) and (j<MaxDestBytes) do
  1391. begin
  1392. lw:=ord(Source[i]);
  1393. case lw of
  1394. 0..$7f:
  1395. begin
  1396. Dest[j]:=char(lw);
  1397. inc(j);
  1398. end;
  1399. $80..$7ff:
  1400. begin
  1401. if j+1>=MaxDestBytes then
  1402. break;
  1403. Dest[j]:=char($c0 or (lw shr 6));
  1404. Dest[j+1]:=char($80 or (lw and $3f));
  1405. inc(j,2);
  1406. end;
  1407. $800..$d7ff,$e000..$ffff:
  1408. begin
  1409. if j+2>=MaxDestBytes then
  1410. break;
  1411. Dest[j]:=char($e0 or (lw shr 12));
  1412. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1413. Dest[j+2]:=char($80 or (lw and $3f));
  1414. inc(j,3);
  1415. end;
  1416. $d800..$dbff:
  1417. {High Surrogates}
  1418. begin
  1419. if j+3>=MaxDestBytes then
  1420. break;
  1421. if (i+1<sourcechars) and
  1422. (word(Source[i+1]) >= $dc00) and
  1423. (word(Source[i+1]) <= $dfff) then
  1424. begin
  1425. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1426. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1427. Dest[j]:=char($f0 or (lw shr 18));
  1428. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1429. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1430. Dest[j+3]:=char($80 or (lw and $3f));
  1431. inc(j,4);
  1432. inc(i);
  1433. end;
  1434. end;
  1435. end;
  1436. inc(i);
  1437. end;
  1438. if j>SizeUInt(MaxDestBytes-1) then
  1439. j:=MaxDestBytes-1;
  1440. Dest[j]:=#0;
  1441. end
  1442. else
  1443. begin
  1444. while i<SourceChars do
  1445. begin
  1446. case word(Source[i]) of
  1447. $0..$7f:
  1448. inc(j);
  1449. $80..$7ff:
  1450. inc(j,2);
  1451. $800..$d7ff,$e000..$ffff:
  1452. inc(j,3);
  1453. $d800..$dbff:
  1454. begin
  1455. if (i+1<sourcechars) and
  1456. (word(Source[i+1]) >= $dc00) and
  1457. (word(Source[i+1]) <= $dfff) then
  1458. begin
  1459. inc(j,4);
  1460. inc(i);
  1461. end;
  1462. end;
  1463. end;
  1464. inc(i);
  1465. end;
  1466. end;
  1467. result:=j+1;
  1468. end;
  1469. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1470. begin
  1471. if assigned(Source) then
  1472. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
  1473. else
  1474. Result:=0;
  1475. end;
  1476. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1477. const
  1478. UNICODE_INVALID=63;
  1479. var
  1480. InputUTF8: SizeUInt;
  1481. IBYTE: BYTE;
  1482. OutputUnicode: SizeUInt;
  1483. PRECHAR: SizeUInt;
  1484. TempBYTE: BYTE;
  1485. CharLen: SizeUint;
  1486. LookAhead: SizeUInt;
  1487. UC: SizeUInt;
  1488. begin
  1489. if not assigned(Source) then
  1490. begin
  1491. result:=0;
  1492. exit;
  1493. end;
  1494. result:=SizeUInt(-1);
  1495. InputUTF8:=0;
  1496. OutputUnicode:=0;
  1497. PreChar:=0;
  1498. if Assigned(Dest) Then
  1499. begin
  1500. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1501. begin
  1502. IBYTE:=byte(Source[InputUTF8]);
  1503. if (IBYTE and $80) = 0 then
  1504. begin
  1505. //One character US-ASCII, convert it to unicode
  1506. if IBYTE = 10 then
  1507. begin
  1508. If (PreChar<>13) and FALSE then
  1509. begin
  1510. //Expand to crlf, conform UTF-8.
  1511. //This procedure will break the memory alocation by
  1512. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1513. if OutputUnicode+1<MaxDestChars then
  1514. begin
  1515. Dest[OutputUnicode]:=WideChar(13);
  1516. inc(OutputUnicode);
  1517. Dest[OutputUnicode]:=WideChar(10);
  1518. inc(OutputUnicode);
  1519. PreChar:=10;
  1520. end
  1521. else
  1522. begin
  1523. Dest[OutputUnicode]:=WideChar(13);
  1524. inc(OutputUnicode);
  1525. end;
  1526. end
  1527. else
  1528. begin
  1529. Dest[OutputUnicode]:=WideChar(IBYTE);
  1530. inc(OutputUnicode);
  1531. PreChar:=IBYTE;
  1532. end;
  1533. end
  1534. else
  1535. begin
  1536. Dest[OutputUnicode]:=WideChar(IBYTE);
  1537. inc(OutputUnicode);
  1538. PreChar:=IBYTE;
  1539. end;
  1540. inc(InputUTF8);
  1541. end
  1542. else
  1543. begin
  1544. TempByte:=IBYTE;
  1545. CharLen:=0;
  1546. while (TempBYTE and $80)<>0 do
  1547. begin
  1548. TempBYTE:=(TempBYTE shl 1) and $FE;
  1549. inc(CharLen);
  1550. end;
  1551. //Test for the "CharLen" conforms UTF-8 string
  1552. //This means the 10xxxxxx pattern.
  1553. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1554. begin
  1555. //Insuficient chars in string to decode
  1556. //UTF-8 array. Fallback to single char.
  1557. CharLen:= 1;
  1558. end;
  1559. for LookAhead := 1 to CharLen-1 do
  1560. begin
  1561. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1562. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1563. begin
  1564. //Invalid UTF-8 sequence, fallback.
  1565. CharLen:= LookAhead;
  1566. break;
  1567. end;
  1568. end;
  1569. UC:=$FFFF;
  1570. case CharLen of
  1571. 1: begin
  1572. //Not valid UTF-8 sequence
  1573. UC:=UNICODE_INVALID;
  1574. end;
  1575. 2: begin
  1576. //Two bytes UTF, convert it
  1577. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1578. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1579. if UC <= $7F then
  1580. begin
  1581. //Invalid UTF sequence.
  1582. UC:=UNICODE_INVALID;
  1583. end;
  1584. end;
  1585. 3: begin
  1586. //Three bytes, convert it to unicode
  1587. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1588. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1589. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1590. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1591. begin
  1592. //Invalid UTF-8 sequence
  1593. UC:= UNICODE_INVALID;
  1594. End;
  1595. end;
  1596. 4: begin
  1597. //Four bytes, convert it to two unicode characters
  1598. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1599. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1600. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1601. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1602. if (UC < $10000) or (UC > $10FFFF) then
  1603. begin
  1604. UC:= UNICODE_INVALID;
  1605. end
  1606. else
  1607. begin
  1608. { only store pair if room }
  1609. dec(UC,$10000);
  1610. if (OutputUnicode<MaxDestChars-1) then
  1611. begin
  1612. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1613. inc(OutputUnicode);
  1614. UC:=(UC and $3ff) + $DC00;
  1615. end
  1616. else
  1617. begin
  1618. InputUTF8:= InputUTF8 + CharLen;
  1619. { don't store anything }
  1620. CharLen:=0;
  1621. end;
  1622. end;
  1623. end;
  1624. 5,6,7: begin
  1625. //Invalid UTF8 to unicode conversion,
  1626. //mask it as invalid UNICODE too.
  1627. UC:=UNICODE_INVALID;
  1628. end;
  1629. end;
  1630. if CharLen > 0 then
  1631. begin
  1632. PreChar:=UC;
  1633. Dest[OutputUnicode]:=WideChar(UC);
  1634. inc(OutputUnicode);
  1635. end;
  1636. InputUTF8:= InputUTF8 + CharLen;
  1637. end;
  1638. end;
  1639. Result:=OutputUnicode+1;
  1640. end
  1641. else
  1642. begin
  1643. while (InputUTF8<SourceBytes) do
  1644. begin
  1645. IBYTE:=byte(Source[InputUTF8]);
  1646. if (IBYTE and $80) = 0 then
  1647. begin
  1648. //One character US-ASCII, convert it to unicode
  1649. if IBYTE = 10 then
  1650. begin
  1651. if (PreChar<>13) and FALSE then
  1652. begin
  1653. //Expand to crlf, conform UTF-8.
  1654. //This procedure will break the memory alocation by
  1655. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1656. inc(OutputUnicode,2);
  1657. PreChar:=10;
  1658. end
  1659. else
  1660. begin
  1661. inc(OutputUnicode);
  1662. PreChar:=IBYTE;
  1663. end;
  1664. end
  1665. else
  1666. begin
  1667. inc(OutputUnicode);
  1668. PreChar:=IBYTE;
  1669. end;
  1670. inc(InputUTF8);
  1671. end
  1672. else
  1673. begin
  1674. TempByte:=IBYTE;
  1675. CharLen:=0;
  1676. while (TempBYTE and $80)<>0 do
  1677. begin
  1678. TempBYTE:=(TempBYTE shl 1) and $FE;
  1679. inc(CharLen);
  1680. end;
  1681. //Test for the "CharLen" conforms UTF-8 string
  1682. //This means the 10xxxxxx pattern.
  1683. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1684. begin
  1685. //Insuficient chars in string to decode
  1686. //UTF-8 array. Fallback to single char.
  1687. CharLen:= 1;
  1688. end;
  1689. for LookAhead := 1 to CharLen-1 do
  1690. begin
  1691. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1692. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1693. begin
  1694. //Invalid UTF-8 sequence, fallback.
  1695. CharLen:= LookAhead;
  1696. break;
  1697. end;
  1698. end;
  1699. UC:=$FFFF;
  1700. case CharLen of
  1701. 1: begin
  1702. //Not valid UTF-8 sequence
  1703. UC:=UNICODE_INVALID;
  1704. end;
  1705. 2: begin
  1706. //Two bytes UTF, convert it
  1707. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1708. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1709. if UC <= $7F then
  1710. begin
  1711. //Invalid UTF sequence.
  1712. UC:=UNICODE_INVALID;
  1713. end;
  1714. end;
  1715. 3: begin
  1716. //Three bytes, convert it to unicode
  1717. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1718. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1719. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1720. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1721. begin
  1722. //Invalid UTF-8 sequence
  1723. UC:= UNICODE_INVALID;
  1724. end;
  1725. end;
  1726. 4: begin
  1727. //Four bytes, convert it to two unicode characters
  1728. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1729. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1730. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1731. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1732. if (UC < $10000) or (UC > $10FFFF) then
  1733. UC:= UNICODE_INVALID
  1734. else
  1735. { extra character character }
  1736. inc(OutputUnicode);
  1737. end;
  1738. 5,6,7: begin
  1739. //Invalid UTF8 to unicode conversion,
  1740. //mask it as invalid UNICODE too.
  1741. UC:=UNICODE_INVALID;
  1742. end;
  1743. end;
  1744. if CharLen > 0 then
  1745. begin
  1746. PreChar:=UC;
  1747. inc(OutputUnicode);
  1748. end;
  1749. InputUTF8:= InputUTF8 + CharLen;
  1750. end;
  1751. end;
  1752. Result:=OutputUnicode+1;
  1753. end;
  1754. end;
  1755. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1756. begin
  1757. Result:=UTF8Encode(UnicodeString(s));
  1758. end;
  1759. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1760. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1761. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1762. var
  1763. i : SizeInt;
  1764. hs : UTF8String;
  1765. begin
  1766. result:='';
  1767. if s='' then
  1768. exit;
  1769. SetLength(hs,length(s)*3);
  1770. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1771. if i>0 then
  1772. begin
  1773. SetLength(hs,i-1);
  1774. result:=hs;
  1775. end;
  1776. end;
  1777. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1778. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1779. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1780. function UTF8Decode(const s : RawByteString): UnicodeString;
  1781. var
  1782. i : SizeInt;
  1783. hs : UnicodeString;
  1784. begin
  1785. result:='';
  1786. if s='' then
  1787. exit;
  1788. SetLength(hs,length(s));
  1789. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1790. if i>0 then
  1791. begin
  1792. SetLength(hs,i-1);
  1793. result:=hs;
  1794. end;
  1795. end;
  1796. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1797. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1798. begin
  1799. Result:=Utf8Encode(s);
  1800. end;
  1801. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1802. begin
  1803. Result:=RawByteString(Utf8Decode(s));
  1804. end;
  1805. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1806. var
  1807. i, reslen: sizeint;
  1808. w: longint;
  1809. begin
  1810. reslen:=0;
  1811. i:=0;
  1812. { calculate required length }
  1813. while (i<len) do
  1814. begin
  1815. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1816. inc(i)
  1817. else if (p[i]<=#$dbff) and
  1818. (i+1<len) and
  1819. (p[i+1]>=#$dc00) and
  1820. (p[i+1]<=#$dfff) then
  1821. inc(i,2)
  1822. else
  1823. inc(i);
  1824. inc(reslen);
  1825. end;
  1826. SetLength(res,reslen+1); { +1 for null termination }
  1827. reslen:=0;
  1828. i:=0;
  1829. { do conversion }
  1830. while (i<len) do
  1831. begin
  1832. w:=ord(p[i]);
  1833. if (w<=$d7ff) or (w>=$e000) then
  1834. res[reslen]:=w
  1835. else if (w<=$dbff) and
  1836. (i+1<len) and
  1837. (p[i+1]>=#$dc00) and
  1838. (p[i+1]<=#$dfff) then
  1839. begin
  1840. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1841. inc(i);
  1842. end
  1843. else { invalid surrogate pair }
  1844. res[reslen]:=w;
  1845. inc(i);
  1846. inc(reslen);
  1847. end;
  1848. res[reslen]:=0;
  1849. end;
  1850. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1851. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1852. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1853. begin
  1854. UCS4Encode(PWideChar(s),Length(s),result);
  1855. end;
  1856. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1857. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1858. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1859. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1860. begin
  1861. UCS4Encode(PWideChar(s),Length(s),result);
  1862. end;
  1863. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1864. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1865. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1866. { dest should point to previously allocated wide/unicodestring }
  1867. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1868. var
  1869. i: sizeint;
  1870. nc: UCS4Char;
  1871. begin
  1872. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1873. begin
  1874. nc:=s[i];
  1875. if (nc<$ffff) then
  1876. dest^:=widechar(nc)
  1877. else if (dword(nc)<=$10ffff) then
  1878. begin
  1879. dest^:=widechar(nc shr 10 + $d7c0);
  1880. { subtracting $10000 doesn't change low 10 bits }
  1881. dest[1]:=widechar(nc and $3ff + $dc00);
  1882. inc(dest);
  1883. end
  1884. else { invalid code point }
  1885. dest^:='?';
  1886. inc(dest);
  1887. end;
  1888. end;
  1889. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1890. var
  1891. i : SizeInt;
  1892. reslen : SizeInt;
  1893. begin
  1894. reslen:=0;
  1895. for i:=0 to length(s)-2 do { skip terminating #0 }
  1896. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1897. SetLength(result,reslen);
  1898. UCS4Decode(s,pointer(result));
  1899. end;
  1900. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1901. var
  1902. i : SizeInt;
  1903. reslen : SizeInt;
  1904. begin
  1905. reslen:=0;
  1906. for i:=0 to length(s)-2 do { skip terminating #0 }
  1907. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1908. SetLength(result,reslen);
  1909. UCS4Decode(s,pointer(result));
  1910. end;
  1911. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1912. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1913. const
  1914. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  1915. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  1916. procedure unimplementedunicodestring;
  1917. begin
  1918. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1919. If IsConsole then
  1920. begin
  1921. Writeln(StdErr,SNoUnicodestrings);
  1922. Writeln(StdErr,SRecompileWithUnicodestrings);
  1923. end;
  1924. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1925. HandleErrorAddrFrameInd(233,get_pc_addr,get_frame);
  1926. end;
  1927. function StringElementSize(const S: UnicodeString): Word; overload;
  1928. begin
  1929. if assigned(Pointer(S)) then
  1930. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  1931. else
  1932. Result:=SizeOf(UnicodeChar);
  1933. end;
  1934. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  1935. begin
  1936. if assigned(Pointer(S)) then
  1937. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  1938. else
  1939. Result:=0;
  1940. end;
  1941. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  1942. begin
  1943. {$ifdef FPC_HAS_CPSTRING}
  1944. if assigned(Pointer(S)) then
  1945. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  1946. else
  1947. {$endif FPC_HAS_CPSTRING}
  1948. Result:=DefaultUnicodeCodePage;
  1949. end;
  1950. {$warnings off}
  1951. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  1952. begin
  1953. unimplementedunicodestring;
  1954. end;
  1955. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  1956. begin
  1957. unimplementedunicodestring;
  1958. end;
  1959. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  1960. begin
  1961. unimplementedunicodestring;
  1962. end;
  1963. {$warnings on}
  1964. procedure initunicodestringmanager;
  1965. begin
  1966. {$ifndef HAS_WIDESTRINGMANAGER}
  1967. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  1968. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  1969. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  1970. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  1971. {$endif HAS_WIDESTRINGMANAGER}
  1972. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  1973. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  1974. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1975. {$ifndef HAS_WIDESTRINGMANAGER}
  1976. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  1977. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  1978. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  1979. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  1980. {$endif HAS_WIDESTRINGMANAGER}
  1981. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  1982. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  1983. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1984. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1985. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1986. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  1987. end;
  1988. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}