ustrings.inc 71 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of chars. Multiply with
  20. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  21. @ : String + Terminating #0;
  22. Punicodechar(Unicodestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PUnicodeRec = ^TUnicodeRec;
  29. TUnicodeRec = Packed Record
  30. CodePage : TSystemCodePage;
  31. ElementSize : Word;
  32. {$ifdef CPU64}
  33. { align fields }
  34. Dummy : DWord;
  35. {$endif CPU64}
  36. Ref : SizeInt;
  37. Len : SizeInt;
  38. First : UnicodeChar;
  39. end;
  40. Const
  41. UnicodeRecLen = SizeOf(TUnicodeRec);
  42. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overwritten for the Current Locale
  47. }
  48. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  49. var
  50. i : SizeInt;
  51. p : PAnsiChar;
  52. begin
  53. setlength(dest,len);
  54. p:=pointer(dest); {SetLength guarantees that dest is unique}
  55. for i:=1 to len do
  56. begin
  57. if word(source^)<256 then
  58. p^:=char(word(source^))
  59. else
  60. p^:='?';
  61. inc(source);
  62. inc(p);
  63. end;
  64. end;
  65. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  66. var
  67. i : SizeInt;
  68. p : PUnicodeChar;
  69. begin
  70. setlength(dest,len);
  71. p:=pointer(dest); {SetLength guarantees that dest is unique}
  72. for i:=1 to len do
  73. begin
  74. p^:=unicodechar(byte(source^));
  75. inc(source);
  76. inc(p);
  77. end;
  78. end;
  79. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  80. begin
  81. DefaultCharLengthPChar:=length(Str);
  82. end;
  83. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  84. begin
  85. if str[0]<>#0 then
  86. DefaultCodePointLength:=1
  87. else
  88. DefaultCodePointLength:=0;
  89. end;
  90. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  91. begin
  92. manager:=widestringmanager;
  93. end;
  94. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  95. begin
  96. Old:=widestringmanager;
  97. widestringmanager:=New;
  98. end;
  99. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  100. begin
  101. widestringmanager:=New;
  102. end;
  103. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  104. begin
  105. manager:=widestringmanager;
  106. end;
  107. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  108. begin
  109. Old:=widestringmanager;
  110. widestringmanager:=New;
  111. end;
  112. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  113. begin
  114. widestringmanager:=New;
  115. end;
  116. {****************************************************************************
  117. Internal functions, not in interface.
  118. ****************************************************************************}
  119. procedure UnicodeStringError;
  120. begin
  121. HandleErrorFrame(204,get_frame);
  122. end;
  123. {$ifdef UnicodeStrDebug}
  124. Procedure DumpUnicodeRec(S : Pointer);
  125. begin
  126. If S=Nil then
  127. Writeln ('String is nil')
  128. Else
  129. Begin
  130. With PUnicodeRec(S-UnicodeFirstOff)^ do
  131. begin
  132. Write ('(Len:',len);
  133. Writeln (' Ref: ',ref,')');
  134. end;
  135. end;
  136. end;
  137. {$endif}
  138. Function NewUnicodeString(Len : SizeInt) : Pointer;
  139. {
  140. Allocate a new UnicodeString on the heap.
  141. initialize it to zero length and reference count 1.
  142. }
  143. Var
  144. P : Pointer;
  145. begin
  146. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  147. If P<>Nil then
  148. begin
  149. PUnicodeRec(P)^.Len:=Len; { Initial length }
  150. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  151. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  152. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  153. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  154. inc(p,UnicodeFirstOff); { Points to string now }
  155. end
  156. else
  157. UnicodeStringError;
  158. NewUnicodeString:=P;
  159. end;
  160. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  161. {
  162. Decreases the ReferenceCount of a non constant unicodestring;
  163. If the reference count is zero, deallocate the string;
  164. }
  165. Var
  166. p: PUnicodeRec;
  167. Begin
  168. { Zero string }
  169. if S=Nil then
  170. exit;
  171. { check for constant strings ...}
  172. p:=PUnicodeRec(S-UnicodeFirstOff);
  173. if p^.Ref<0 then
  174. exit;
  175. { declocked does a MT safe dec and returns true, if the counter is 0 }
  176. if declocked(p^.Ref) then
  177. begin
  178. FreeMem(p);
  179. S:=nil;
  180. end;
  181. end;
  182. { alias for internal use }
  183. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  184. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  185. Begin
  186. If S=Nil then
  187. exit;
  188. { constant string ? }
  189. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  190. exit;
  191. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  192. end;
  193. { alias for internal use }
  194. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  195. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  196. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  197. {
  198. Converts a UnicodeString to a ShortString;
  199. }
  200. Var
  201. Size : SizeInt;
  202. temp : ansistring;
  203. begin
  204. result:='';
  205. Size:=Length(S2);
  206. if Size>0 then
  207. begin
  208. If Size>high_of_res then
  209. Size:=high_of_res;
  210. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  211. result:=temp;
  212. end;
  213. end;
  214. {$else FPC_STRTOSHORTSTRINGPROC}
  215. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  216. {
  217. Converts a UnicodeString to a ShortString;
  218. }
  219. Var
  220. Size : SizeInt;
  221. temp : ansistring;
  222. begin
  223. res:='';
  224. Size:=Length(S2);
  225. if Size>0 then
  226. begin
  227. If Size>high(res) then
  228. Size:=high(res);
  229. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  230. res:=temp;
  231. end;
  232. end;
  233. {$endif FPC_STRTOSHORTSTRINGPROC}
  234. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  235. {
  236. Converts a ShortString to a UnicodeString;
  237. }
  238. Var
  239. Size : SizeInt;
  240. begin
  241. result:='';
  242. Size:=Length(S2);
  243. if Size>0 then
  244. begin
  245. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  246. { Terminating Zero }
  247. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  248. end;
  249. end;
  250. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  251. {
  252. Converts a UnicodeString to an AnsiString
  253. }
  254. Var
  255. Size : SizeInt;
  256. {$ifndef FPC_HAS_CPSTRING}
  257. cp : TSystemCodePage;
  258. {$endif FPC_HAS_CPSTRING}
  259. begin
  260. {$ifndef FPC_HAS_CPSTRING}
  261. cp:=DefaultSystemCodePage;
  262. {$endif FPC_HAS_CPSTRING}
  263. result:='';
  264. Size:=Length(S2);
  265. if Size>0 then
  266. begin
  267. if (cp=$ffff) or (cp=0) then
  268. cp:=DefaultSystemCodePage;
  269. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  270. end;
  271. end;
  272. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  273. {
  274. Converts an AnsiString to a UnicodeString;
  275. }
  276. Var
  277. Size : SizeInt;
  278. cp: TSystemCodePage;
  279. begin
  280. result:='';
  281. Size:=Length(S2);
  282. if Size>0 then
  283. begin
  284. cp:=StringCodePage(S2);
  285. if (cp=$ffff) or (cp=0) then
  286. cp:=DefaultSystemCodePage;
  287. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  288. end;
  289. end;
  290. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  291. begin
  292. SetLength(Result,Length(S2));
  293. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  294. end;
  295. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  296. begin
  297. SetLength(Result,Length(S2));
  298. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  299. end;
  300. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  301. var
  302. Size : SizeInt;
  303. {$ifndef FPC_HAS_CPSTRING}
  304. cp : TSystemCodePage;
  305. {$endif FPC_HAS_CPSTRING}
  306. begin
  307. {$ifndef FPC_HAS_CPSTRING}
  308. cp:=DefaultSystemCodePage;
  309. {$endif FPC_HAS_CPSTRING}
  310. result:='';
  311. if p=nil then
  312. exit;
  313. Size := IndexWord(p^, -1, 0);
  314. if Size>0 then
  315. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  316. end;
  317. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  318. var
  319. Size : SizeInt;
  320. begin
  321. result:='';
  322. if p=nil then
  323. exit;
  324. Size := IndexWord(p^, -1, 0);
  325. Setlength(result,Size);
  326. if Size>0 then
  327. begin
  328. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  329. { Terminating Zero }
  330. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  331. end;
  332. end;
  333. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  334. var
  335. Size : SizeInt;
  336. begin
  337. result:='';
  338. if p=nil then
  339. exit;
  340. Size := IndexWord(p^, -1, 0);
  341. Setlength(result,Size);
  342. if Size>0 then
  343. begin
  344. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  345. { Terminating Zero }
  346. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  347. end;
  348. end;
  349. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  350. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  351. var
  352. Size : SizeInt;
  353. temp: ansistring;
  354. begin
  355. result:='';
  356. if p=nil then
  357. exit;
  358. Size := IndexWord(p^, $7fffffff, 0);
  359. if Size>0 then
  360. begin
  361. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  362. result:=temp;
  363. end;
  364. end;
  365. {$else FPC_STRTOSHORTSTRINGPROC}
  366. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  367. var
  368. Size : SizeInt;
  369. temp: ansistring;
  370. begin
  371. res:='';
  372. if p=nil then
  373. exit;
  374. Size:=IndexWord(p^, high(PtrInt), 0);
  375. if Size>0 then
  376. begin
  377. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  378. res:=temp;
  379. end;
  380. end;
  381. {$endif FPC_STRTOSHORTSTRINGPROC}
  382. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  383. var
  384. Size : SizeInt;
  385. {$ifndef FPC_HAS_CPSTRING}
  386. cp : TSystemCodePage;
  387. {$endif FPC_HAS_CPSTRING}
  388. begin
  389. {$ifndef FPC_HAS_CPSTRING}
  390. cp:=DefaultSystemCodePage;
  391. {$endif FPC_HAS_CPSTRING}
  392. result:='';
  393. if p=nil then
  394. exit;
  395. Size := IndexWord(p^, -1, 0);
  396. if Size>0 then
  397. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  398. end;
  399. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  400. Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
  401. var
  402. Size : SizeInt;
  403. temp: ansistring;
  404. begin
  405. result:='';
  406. if p=nil then
  407. exit;
  408. Size := IndexWord(p^, $7fffffff, 0);
  409. if Size>0 then
  410. begin
  411. widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
  412. result:=temp;
  413. end;
  414. end;
  415. {$else FPC_STRTOSHORTSTRINGPROC}
  416. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  417. var
  418. Size : SizeInt;
  419. temp: ansistring;
  420. begin
  421. res:='';
  422. if p=nil then
  423. exit;
  424. Size:=IndexWord(p^, high(PtrInt), 0);
  425. if Size>0 then
  426. begin
  427. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  428. res:=temp;
  429. end;
  430. end;
  431. {$endif FPC_STRTOSHORTSTRINGPROC}
  432. { checked against the ansistring routine, 2001-05-27 (FK) }
  433. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  434. {
  435. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  436. }
  437. begin
  438. If S2<>nil then
  439. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  440. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  441. { Decrease the reference count on the old S1 }
  442. fpc_unicodestr_decr_ref (S1);
  443. s1:=s2;
  444. end;
  445. { alias for internal use }
  446. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  447. {$ifndef STR_CONCAT_PROCS}
  448. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  449. Var
  450. Size,Location : SizeInt;
  451. pc : punicodechar;
  452. begin
  453. { only assign if s1 or s2 is empty }
  454. if (S1='') then
  455. begin
  456. result:=s2;
  457. exit;
  458. end;
  459. if (S2='') then
  460. begin
  461. result:=s1;
  462. exit;
  463. end;
  464. Location:=Length(S1);
  465. Size:=length(S2);
  466. SetLength(result,Size+Location);
  467. pc:=punicodechar(result);
  468. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  469. inc(pc,location);
  470. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  471. end;
  472. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  473. Var
  474. i : Longint;
  475. p : pointer;
  476. pc : punicodechar;
  477. Size,NewSize : SizeInt;
  478. begin
  479. { First calculate size of the result so we can do
  480. a single call to SetLength() }
  481. NewSize:=0;
  482. for i:=low(sarr) to high(sarr) do
  483. inc(Newsize,length(sarr[i]));
  484. SetLength(result,NewSize);
  485. pc:=punicodechar(result);
  486. for i:=low(sarr) to high(sarr) do
  487. begin
  488. p:=pointer(sarr[i]);
  489. if assigned(p) then
  490. begin
  491. Size:=length(unicodestring(p));
  492. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  493. inc(pc,size);
  494. end;
  495. end;
  496. end;
  497. {$else STR_CONCAT_PROCS}
  498. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  499. Var
  500. Size,Location : SizeInt;
  501. same : boolean;
  502. begin
  503. { only assign if s1 or s2 is empty }
  504. if (S1='') then
  505. begin
  506. DestS:=s2;
  507. exit;
  508. end;
  509. if (S2='') then
  510. begin
  511. DestS:=s1;
  512. exit;
  513. end;
  514. Location:=Length(S1);
  515. Size:=length(S2);
  516. { Use Pointer() typecasts to prevent extra conversion code }
  517. if Pointer(DestS)=Pointer(S1) then
  518. begin
  519. same:=Pointer(S1)=Pointer(S2);
  520. SetLength(DestS,Size+Location);
  521. if same then
  522. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  523. else
  524. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  525. end
  526. else if Pointer(DestS)=Pointer(S2) then
  527. begin
  528. SetLength(DestS,Size+Location);
  529. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  530. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  531. end
  532. else
  533. begin
  534. DestS:='';
  535. SetLength(DestS,Size+Location);
  536. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  537. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  538. end;
  539. end;
  540. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  541. Var
  542. i : Longint;
  543. p,pc : pointer;
  544. Size,NewLen : SizeInt;
  545. lowstart : longint;
  546. destcopy : pointer;
  547. OldDestLen : SizeInt;
  548. begin
  549. if high(sarr)=0 then
  550. begin
  551. DestS:='';
  552. exit;
  553. end;
  554. destcopy:=nil;
  555. lowstart:=low(sarr);
  556. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  557. inc(lowstart);
  558. { Check for another reuse, then we can't use
  559. the append optimization }
  560. for i:=lowstart to high(sarr) do
  561. begin
  562. if Pointer(DestS)=Pointer(sarr[i]) then
  563. begin
  564. { if DestS is used somewhere in the middle of the expression,
  565. we need to make sure the original string still exists after
  566. we empty/modify DestS.
  567. This trick only works with reference counted strings. Therefor
  568. this optimization is disabled for WINLIKEUNICODESTRING }
  569. destcopy:=pointer(dests);
  570. fpc_UnicodeStr_Incr_Ref(destcopy);
  571. lowstart:=low(sarr);
  572. break;
  573. end;
  574. end;
  575. { Start with empty DestS if we start with concatting
  576. the first array element }
  577. if lowstart=low(sarr) then
  578. DestS:='';
  579. OldDestLen:=length(DestS);
  580. { Calculate size of the result so we can do
  581. a single call to SetLength() }
  582. NewLen:=0;
  583. for i:=low(sarr) to high(sarr) do
  584. inc(NewLen,length(sarr[i]));
  585. SetLength(DestS,NewLen);
  586. { Concat all strings, except the string we already
  587. copied in DestS }
  588. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  589. for i:=lowstart to high(sarr) do
  590. begin
  591. p:=pointer(sarr[i]);
  592. if assigned(p) then
  593. begin
  594. Size:=length(unicodestring(p));
  595. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  596. inc(pc,size*sizeof(UnicodeChar));
  597. end;
  598. end;
  599. fpc_UnicodeStr_Decr_Ref(destcopy);
  600. end;
  601. {$endif STR_CONCAT_PROCS}
  602. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  603. var
  604. w: unicodestring;
  605. begin
  606. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  607. fpc_Char_To_UChar:=w[1];
  608. end;
  609. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  610. {
  611. Converts a Char to a UnicodeString;
  612. }
  613. begin
  614. Setlength(fpc_Char_To_UnicodeStr,1);
  615. fpc_Char_To_UnicodeStr[1]:=c;
  616. { Terminating Zero }
  617. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  618. end;
  619. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  620. {
  621. Converts a UnicodeChar to a Char;
  622. }
  623. var
  624. s: ansistring;
  625. begin
  626. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  627. if length(s)=1 then
  628. fpc_UChar_To_Char:= s[1]
  629. else
  630. fpc_UChar_To_Char:='?';
  631. end;
  632. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  633. {
  634. Converts a WideChar to a UnicodeString;
  635. }
  636. begin
  637. Setlength (Result,1);
  638. Result[1]:= c;
  639. end;
  640. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  641. var
  642. w: widestring;
  643. begin
  644. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  645. fpc_Char_To_WChar:=w[1];
  646. end;
  647. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  648. {
  649. Converts a WideChar to a Char;
  650. }
  651. var
  652. s: ansistring;
  653. begin
  654. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  655. if length(s)=1 then
  656. fpc_WChar_To_Char:= s[1]
  657. else
  658. fpc_WChar_To_Char:='?';
  659. end;
  660. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  661. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  662. {
  663. Converts a WideChar to a ShortString;
  664. }
  665. var
  666. s: ansistring;
  667. begin
  668. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  669. fpc_WChar_To_ShortStr:= s;
  670. end;
  671. {$else FPC_STRTOSHORTSTRINGPROC}
  672. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  673. {
  674. Converts a WideChar to a ShortString;
  675. }
  676. var
  677. s: ansistring;
  678. begin
  679. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  680. res:=s;
  681. end;
  682. {$endif FPC_STRTOSHORTSTRINGPROC}
  683. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  684. {
  685. Converts a UnicodeChar to a UnicodeString;
  686. }
  687. begin
  688. Setlength (fpc_UChar_To_UnicodeStr,1);
  689. fpc_UChar_To_UnicodeStr[1]:= c;
  690. end;
  691. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  692. {
  693. Converts a UnicodeChar to a AnsiString;
  694. }
  695. {$ifndef FPC_HAS_CPSTRING}
  696. var
  697. cp : TSystemCodePage;
  698. {$endif FPC_HAS_CPSTRING}
  699. begin
  700. {$ifndef FPC_HAS_CPSTRING}
  701. cp:=DefaultSystemCodePage;
  702. {$endif FPC_HAS_CPSTRING}
  703. if (cp=$ffff) or (cp=0) then
  704. cp:=DefaultSystemCodePage;
  705. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  706. end;
  707. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  708. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  709. {
  710. Converts a UnicodeChar to a ShortString;
  711. }
  712. var
  713. s: ansistring;
  714. begin
  715. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  716. fpc_UChar_To_ShortStr:= s;
  717. end;
  718. {$else FPC_STRTOSHORTSTRINGPROC}
  719. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  720. {
  721. Converts a UnicodeChar to a ShortString;
  722. }
  723. var
  724. s: ansistring;
  725. begin
  726. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  727. res:=s;
  728. end;
  729. {$endif FPC_STRTOSHORTSTRINGPROC}
  730. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  731. Var
  732. L : SizeInt;
  733. begin
  734. if (not assigned(p)) or (p[0]=#0) Then
  735. begin
  736. fpc_pchar_to_unicodestr := '';
  737. exit;
  738. end;
  739. l:=IndexChar(p^,-1,#0);
  740. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  741. end;
  742. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  743. var
  744. i : SizeInt;
  745. begin
  746. if zerobased then
  747. begin
  748. if arr[0]=#0 Then
  749. begin
  750. fpc_chararray_to_unicodestr:='';
  751. exit;
  752. end;
  753. i:=IndexChar(arr,high(arr)+1,#0);
  754. if i=-1 then
  755. i:=high(arr)+1;
  756. end
  757. else
  758. i:=high(arr)+1;
  759. SetLength(fpc_CharArray_To_UnicodeStr,i);
  760. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  761. end;
  762. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  763. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  764. var
  765. l: longint;
  766. index: longint;
  767. len: byte;
  768. temp: ansistring;
  769. begin
  770. l := high(arr)+1;
  771. if l>=256 then
  772. l:=255
  773. else if l<0 then
  774. l:=0;
  775. if zerobased then
  776. begin
  777. index:=IndexWord(arr[0],l,0);
  778. if (index < 0) then
  779. len := l
  780. else
  781. len := index;
  782. end
  783. else
  784. len := l;
  785. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  786. fpc_UnicodeCharArray_To_ShortStr := temp;
  787. end;
  788. {$else FPC_STRTOSHORTSTRINGPROC}
  789. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  790. var
  791. l: longint;
  792. index: ptrint;
  793. len: byte;
  794. temp: ansistring;
  795. begin
  796. l := high(arr)+1;
  797. if l>=high(res)+1 then
  798. l:=high(res)
  799. else if l<0 then
  800. l:=0;
  801. if zerobased then
  802. begin
  803. index:=IndexWord(arr[0],l,0);
  804. if index<0 then
  805. len:=l
  806. else
  807. len:=index;
  808. end
  809. else
  810. len:=l;
  811. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  812. res:=temp;
  813. end;
  814. {$endif FPC_STRTOSHORTSTRINGPROC}
  815. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING}zerobased: boolean = true): AnsiString; compilerproc;
  816. var
  817. i : SizeInt;
  818. {$ifndef FPC_HAS_CPSTRING}
  819. cp : TSystemCodePage;
  820. {$endif FPC_HAS_CPSTRING}
  821. begin
  822. {$ifndef FPC_HAS_CPSTRING}
  823. cp:=DefaultSystemCodePage;
  824. {$endif FPC_HAS_CPSTRING}
  825. if (zerobased) then
  826. begin
  827. i:=IndexWord(arr,high(arr)+1,0);
  828. if i = -1 then
  829. i := high(arr)+1;
  830. end
  831. else
  832. i := high(arr)+1;
  833. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  834. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  835. end;
  836. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  837. var
  838. i : SizeInt;
  839. begin
  840. if (zerobased) then
  841. begin
  842. i:=IndexWord(arr,high(arr)+1,0);
  843. if i = -1 then
  844. i := high(arr)+1;
  845. end
  846. else
  847. i := high(arr)+1;
  848. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  849. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  850. end;
  851. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  852. var
  853. i : SizeInt;
  854. begin
  855. if (zerobased) then
  856. begin
  857. i:=IndexWord(arr,high(arr)+1,0);
  858. if i = -1 then
  859. i := high(arr)+1;
  860. end
  861. else
  862. i := high(arr)+1;
  863. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  864. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  865. end;
  866. { due to their names, the following procedures should be in wstrings.inc,
  867. however, the compiler generates code using this functions on all platforms }
  868. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  869. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  870. var
  871. l: longint;
  872. index: longint;
  873. len: byte;
  874. temp: ansistring;
  875. begin
  876. l := high(arr)+1;
  877. if l>=256 then
  878. l:=255
  879. else if l<0 then
  880. l:=0;
  881. if zerobased then
  882. begin
  883. index:=IndexWord(arr[0],l,0);
  884. if (index < 0) then
  885. len := l
  886. else
  887. len := index;
  888. end
  889. else
  890. len := l;
  891. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  892. fpc_WideCharArray_To_ShortStr := temp;
  893. end;
  894. {$else FPC_STRTOSHORTSTRINGPROC}
  895. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  896. var
  897. l: longint;
  898. index: ptrint;
  899. len: byte;
  900. temp: ansistring;
  901. begin
  902. l := high(arr)+1;
  903. if l>=high(res)+1 then
  904. l:=high(res)
  905. else if l<0 then
  906. l:=0;
  907. if zerobased then
  908. begin
  909. index:=IndexWord(arr[0],l,0);
  910. if index<0 then
  911. len:=l
  912. else
  913. len:=index;
  914. end
  915. else
  916. len:=l;
  917. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  918. res:=temp;
  919. end;
  920. {$endif FPC_STRTOSHORTSTRINGPROC}
  921. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  922. var
  923. i : SizeInt;
  924. {$ifndef FPC_HAS_CPSTRING}
  925. cp : TSystemCodePage;
  926. {$endif FPC_HAS_CPSTRING}
  927. begin
  928. {$ifndef FPC_HAS_CPSTRING}
  929. cp:=DefaultSystemCodePage;
  930. {$endif FPC_HAS_CPSTRING}
  931. if (zerobased) then
  932. begin
  933. i:=IndexWord(arr,high(arr)+1,0);
  934. if i = -1 then
  935. i := high(arr)+1;
  936. end
  937. else
  938. i := high(arr)+1;
  939. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  940. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  941. end;
  942. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  943. var
  944. i : SizeInt;
  945. begin
  946. if (zerobased) then
  947. begin
  948. i:=IndexWord(arr,high(arr)+1,0);
  949. if i = -1 then
  950. i := high(arr)+1;
  951. end
  952. else
  953. i := high(arr)+1;
  954. SetLength(fpc_WideCharArray_To_WideStr,i);
  955. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  956. end;
  957. {$ifndef FPC_STRTOCHARARRAYPROC}
  958. { inside the compiler, the resulttype is modified to that of the actual }
  959. { chararray we're converting to (JM) }
  960. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  961. var
  962. len: SizeInt;
  963. temp: ansistring;
  964. begin
  965. len := length(src);
  966. { make sure we don't dereference src if it can be nil (JM) }
  967. if len > 0 then
  968. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  969. len := length(temp);
  970. if len > arraysize then
  971. len := arraysize;
  972. {$push}
  973. {$r-}
  974. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  975. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  976. {$pop}
  977. end;
  978. { inside the compiler, the resulttype is modified to that of the actual }
  979. { unicodechararray we're converting to (JM) }
  980. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  981. var
  982. len: SizeInt;
  983. begin
  984. len := length(src);
  985. if len > arraysize then
  986. len := arraysize;
  987. {$push}
  988. {$r-}
  989. { make sure we don't try to access element 1 of the ansistring if it's nil }
  990. if len > 0 then
  991. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  992. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  993. {$pop}
  994. end;
  995. { inside the compiler, the resulttype is modified to that of the actual }
  996. { chararray we're converting to (JM) }
  997. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  998. var
  999. len: SizeInt;
  1000. temp: unicodestring;
  1001. begin
  1002. len := length(src);
  1003. { make sure we don't dereference src if it can be nil (JM) }
  1004. if len > 0 then
  1005. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1006. len := length(temp);
  1007. if len > arraysize then
  1008. len := arraysize;
  1009. {$push}
  1010. {$r-}
  1011. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  1012. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1013. {$pop}
  1014. end;
  1015. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  1016. var
  1017. len: longint;
  1018. temp : unicodestring;
  1019. begin
  1020. len := length(src);
  1021. { make sure we don't access char 1 if length is 0 (JM) }
  1022. if len > 0 then
  1023. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1024. len := length(temp);
  1025. if len > arraysize then
  1026. len := arraysize;
  1027. {$push}
  1028. {$r-}
  1029. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  1030. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1031. {$pop}
  1032. end;
  1033. {$else ndef FPC_STRTOCHARARRAYPROC}
  1034. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  1035. var
  1036. len: SizeInt;
  1037. temp: ansistring;
  1038. begin
  1039. len := length(src);
  1040. { make sure we don't dereference src if it can be nil (JM) }
  1041. if len > 0 then
  1042. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  1043. len := length(temp);
  1044. if len > length(res) then
  1045. len := length(res);
  1046. {$push}
  1047. {$r-}
  1048. move(temp[1],res[0],len);
  1049. fillchar(res[len],length(res)-len,0);
  1050. {$pop}
  1051. end;
  1052. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  1053. var
  1054. len: SizeInt;
  1055. begin
  1056. len := length(src);
  1057. if len > length(res) then
  1058. len := length(res);
  1059. {$push}
  1060. {$r-}
  1061. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1062. if len > 0 then
  1063. move(src[1],res[0],len*SizeOf(UnicodeChar));
  1064. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1065. {$pop}
  1066. end;
  1067. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  1068. var
  1069. len: SizeInt;
  1070. temp: unicodestring;
  1071. begin
  1072. len := length(src);
  1073. { make sure we don't dereference src if it can be nil (JM) }
  1074. if len > 0 then
  1075. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1076. len := length(temp);
  1077. if len > length(res) then
  1078. len := length(res);
  1079. {$push}
  1080. {$r-}
  1081. move(temp[1],res[0],len*sizeof(unicodechar));
  1082. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1083. {$pop}
  1084. end;
  1085. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  1086. var
  1087. len: longint;
  1088. temp : unicodestring;
  1089. begin
  1090. len := length(src);
  1091. { make sure we don't access char 1 if length is 0 (JM) }
  1092. if len > 0 then
  1093. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1094. len := length(temp);
  1095. if len > length(res) then
  1096. len := length(res);
  1097. {$push}
  1098. {$r-}
  1099. move(temp[1],res[0],len*sizeof(unicodechar));
  1100. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1101. {$pop}
  1102. end;
  1103. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  1104. var
  1105. len: SizeInt;
  1106. temp: widestring;
  1107. begin
  1108. len := length(src);
  1109. { make sure we don't dereference src if it can be nil (JM) }
  1110. if len > 0 then
  1111. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  1112. len := length(temp);
  1113. if len > length(res) then
  1114. len := length(res);
  1115. {$push}
  1116. {$r-}
  1117. move(temp[1],res[0],len*sizeof(widechar));
  1118. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1119. {$pop}
  1120. end;
  1121. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  1122. var
  1123. len: longint;
  1124. temp : widestring;
  1125. begin
  1126. len := length(src);
  1127. { make sure we don't access char 1 if length is 0 (JM) }
  1128. if len > 0 then
  1129. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1130. len := length(temp);
  1131. if len > length(res) then
  1132. len := length(res);
  1133. {$push}
  1134. {$r-}
  1135. move(temp[1],res[0],len*sizeof(widechar));
  1136. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1137. {$pop}
  1138. end;
  1139. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  1140. var
  1141. len: SizeInt;
  1142. begin
  1143. len := length(src);
  1144. if len > length(res) then
  1145. len := length(res);
  1146. {$push}
  1147. {$r-}
  1148. { make sure we don't try to access element 1 of the widestring if it's nil }
  1149. if len > 0 then
  1150. move(src[1],res[0],len*SizeOf(WideChar));
  1151. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1152. {$pop}
  1153. end;
  1154. {$endif ndef FPC_STRTOCHARARRAYPROC}
  1155. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  1156. {
  1157. Compares 2 UnicodeStrings;
  1158. The result is
  1159. <0 if S1<S2
  1160. 0 if S1=S2
  1161. >0 if S1>S2
  1162. }
  1163. Var
  1164. MaxI,Temp : SizeInt;
  1165. begin
  1166. if pointer(S1)=pointer(S2) then
  1167. begin
  1168. fpc_UnicodeStr_Compare:=0;
  1169. exit;
  1170. end;
  1171. Maxi:=Length(S1);
  1172. temp:=Length(S2);
  1173. If MaxI>Temp then
  1174. MaxI:=Temp;
  1175. Temp:=CompareWord(S1[1],S2[1],MaxI);
  1176. if temp=0 then
  1177. temp:=Length(S1)-Length(S2);
  1178. fpc_UnicodeStr_Compare:=Temp;
  1179. end;
  1180. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  1181. {
  1182. Compares 2 UnicodeStrings for equality only;
  1183. The result is
  1184. 0 if S1=S2
  1185. <>0 if S1<>S2
  1186. }
  1187. Var
  1188. MaxI : SizeInt;
  1189. begin
  1190. if pointer(S1)=pointer(S2) then
  1191. exit(0);
  1192. Maxi:=Length(S1);
  1193. If MaxI<>Length(S2) then
  1194. exit(-1)
  1195. else
  1196. exit(CompareWord(S1[1],S2[1],MaxI));
  1197. end;
  1198. {$ifdef VER2_4}
  1199. // obsolete but needed for bootstrapping with 2.4
  1200. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1201. begin
  1202. if p=nil then
  1203. HandleErrorFrame(201,get_frame);
  1204. end;
  1205. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1206. begin
  1207. if (index>len) or (Index<1) then
  1208. HandleErrorFrame(201,get_frame);
  1209. end;
  1210. {$else VER2_4}
  1211. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1212. begin
  1213. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  1214. HandleErrorFrame(201,get_frame);
  1215. end;
  1216. {$endif VER2_4}
  1217. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1218. {
  1219. Sets The length of string S to L.
  1220. Makes sure S is unique, and contains enough room.
  1221. }
  1222. Var
  1223. Temp : Pointer;
  1224. movelen: SizeInt;
  1225. lens, lena : SizeUInt;
  1226. begin
  1227. if (l>0) then
  1228. begin
  1229. if Pointer(S)=nil then
  1230. begin
  1231. { Need a complete new string...}
  1232. Pointer(s):=NewUnicodeString(l);
  1233. end
  1234. else
  1235. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1236. begin
  1237. Dec(Pointer(S),UnicodeFirstOff);
  1238. lens:=MemSize(Pointer(s));
  1239. lena:=SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen);
  1240. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  1241. reallocmem(pointer(S), lena);
  1242. Inc(Pointer(S), UnicodeFirstOff);
  1243. end
  1244. else
  1245. begin
  1246. { Reallocation is needed... }
  1247. Temp:=Pointer(NewUnicodeString(L));
  1248. if Length(S)>0 then
  1249. begin
  1250. if l < succ(length(s)) then
  1251. movelen := l
  1252. { also move terminating null }
  1253. else
  1254. movelen := succ(length(s));
  1255. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1256. end;
  1257. fpc_unicodestr_decr_ref(Pointer(S));
  1258. Pointer(S):=Temp;
  1259. end;
  1260. { Force nil termination in case it gets shorter }
  1261. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1262. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  1263. end
  1264. else
  1265. begin
  1266. { Length=0 }
  1267. if Pointer(S)<>nil then
  1268. fpc_unicodestr_decr_ref (Pointer(S));
  1269. Pointer(S):=Nil;
  1270. end;
  1271. end;
  1272. {*****************************************************************************
  1273. Public functions, In interface.
  1274. *****************************************************************************}
  1275. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1276. begin
  1277. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1278. end;
  1279. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1280. var
  1281. temp:unicodestring;
  1282. begin
  1283. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1284. if Length(temp)<DestSize then
  1285. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1286. else
  1287. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1288. Dest[DestSize-1]:=#0;
  1289. result:=Dest;
  1290. end;
  1291. function WideCharToString(S : PWideChar) : UnicodeString;
  1292. begin
  1293. result:=WideCharLenToString(s,Length(WideString(s)));
  1294. end;
  1295. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1296. var
  1297. temp:widestring;
  1298. begin
  1299. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1300. if Length(temp)<DestSize then
  1301. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1302. else
  1303. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1304. Dest[DestSize-1]:=#0;
  1305. result:=Dest;
  1306. end;
  1307. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1308. begin
  1309. SetLength(result,Len);
  1310. Move(S^,Pointer(Result)^,Len*2);
  1311. end;
  1312. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1313. begin
  1314. Dest:=UnicodeCharLenToString(Src,Len);
  1315. end;
  1316. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1317. begin
  1318. Dest:=UnicodeCharLenToString(Src,Len);
  1319. end;
  1320. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1321. begin
  1322. Dest:=UnicodeCharToString(S);
  1323. end;
  1324. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1325. begin
  1326. SetLength(result,Len);
  1327. Move(S^,Pointer(Result)^,Len*2);
  1328. end;
  1329. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1330. begin
  1331. Dest:=WideCharLenToString(Src,Len);
  1332. end;
  1333. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1334. begin
  1335. Dest:=WideCharLenToString(Src,Len);
  1336. end;
  1337. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1338. begin
  1339. Dest:=WideCharToString(S);
  1340. end;
  1341. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1342. begin
  1343. Dest:=WideCharToString(S);
  1344. end;
  1345. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1346. {
  1347. Make sure reference count of S is 1,
  1348. using copy-on-write semantics.
  1349. }
  1350. Var
  1351. SNew : Pointer;
  1352. L : SizeInt;
  1353. begin
  1354. pointer(result) := pointer(s);
  1355. If Pointer(S)=Nil then
  1356. exit;
  1357. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1358. begin
  1359. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1360. SNew:=NewUnicodeString (L);
  1361. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1362. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1363. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1364. pointer(S):=SNew;
  1365. pointer(result):=SNew;
  1366. end;
  1367. end;
  1368. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1369. var
  1370. ResultAddress : Pointer;
  1371. begin
  1372. ResultAddress:=Nil;
  1373. dec(index);
  1374. if Index < 0 then
  1375. Index := 0;
  1376. { Check Size. Accounts for Zero-length S, the double check is needed because
  1377. Size can be maxint and will get <0 when adding index }
  1378. if (Size>Length(S)) or
  1379. (Index+Size>Length(S)) then
  1380. Size:=Length(S)-Index;
  1381. If Size>0 then
  1382. begin
  1383. If Index<0 Then
  1384. Index:=0;
  1385. ResultAddress:=Pointer(NewUnicodeString (Size));
  1386. if ResultAddress<>Nil then
  1387. begin
  1388. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1389. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1390. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1391. end;
  1392. end;
  1393. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1394. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1395. end;
  1396. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1397. var
  1398. i,MaxLen : SizeInt;
  1399. pc : punicodechar;
  1400. begin
  1401. Pos:=0;
  1402. if Length(SubStr)>0 then
  1403. begin
  1404. MaxLen:=Length(source)-Length(SubStr);
  1405. i:=0;
  1406. pc:=@source[1];
  1407. while (i<=MaxLen) do
  1408. begin
  1409. inc(i);
  1410. if (SubStr[1]=pc^) and
  1411. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1412. begin
  1413. Pos:=i;
  1414. exit;
  1415. end;
  1416. inc(pc);
  1417. end;
  1418. end;
  1419. end;
  1420. { Faster version for a unicodechar alone }
  1421. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1422. var
  1423. i: SizeInt;
  1424. pc : punicodechar;
  1425. begin
  1426. pc:=@s[1];
  1427. for i:=1 to length(s) do
  1428. begin
  1429. if pc^=c then
  1430. begin
  1431. pos:=i;
  1432. exit;
  1433. end;
  1434. inc(pc);
  1435. end;
  1436. pos:=0;
  1437. end;
  1438. Function Pos (c : RawByteString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1439. begin
  1440. result:=Pos(UnicodeString(c),s);
  1441. end;
  1442. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1443. begin
  1444. result:=Pos(UnicodeString(c),s);
  1445. end;
  1446. Function Pos (c : UnicodeString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1447. begin
  1448. result:=Pos(c,UnicodeString(s));
  1449. end;
  1450. { Faster version for a char alone. Must be implemented because }
  1451. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1452. { using pos(char,pchar) will always call the shortstring version }
  1453. { (exact match for first argument), also with $h+ (JM) }
  1454. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1455. var
  1456. i: SizeInt;
  1457. wc : unicodechar;
  1458. pc : punicodechar;
  1459. begin
  1460. wc:=c;
  1461. pc:=@s[1];
  1462. for i:=1 to length(s) do
  1463. begin
  1464. if pc^=wc then
  1465. begin
  1466. pos:=i;
  1467. exit;
  1468. end;
  1469. inc(pc);
  1470. end;
  1471. pos:=0;
  1472. end;
  1473. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1474. Var
  1475. LS : SizeInt;
  1476. begin
  1477. LS:=Length(S);
  1478. if (Index>LS) or (Index<=0) or (Size<=0) then
  1479. exit;
  1480. UniqueString (S);
  1481. { (Size+Index) will overflow if Size=MaxInt. }
  1482. if Size>LS-Index then
  1483. Size:=LS-Index+1;
  1484. if Size<=LS-Index then
  1485. begin
  1486. Dec(Index);
  1487. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1488. end;
  1489. Setlength(s,LS-Size);
  1490. end;
  1491. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1492. var
  1493. Temp : UnicodeString;
  1494. LS : SizeInt;
  1495. begin
  1496. If Length(Source)=0 then
  1497. exit;
  1498. if index <= 0 then
  1499. index := 1;
  1500. Ls:=Length(S);
  1501. if index > LS then
  1502. index := LS+1;
  1503. Dec(Index);
  1504. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1505. SetLength(Temp,Length(Source)+LS);
  1506. If Index>0 then
  1507. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1508. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1509. If (LS-Index)>0 then
  1510. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1511. S:=Temp;
  1512. end;
  1513. Function UpCase(c:UnicodeChar):UnicodeChar;
  1514. var
  1515. s : UnicodeString;
  1516. begin
  1517. s:=c;
  1518. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1519. end;
  1520. function UpCase(const s : UnicodeString) : UnicodeString;
  1521. begin
  1522. result:=widestringmanager.UpperUnicodeStringProc(s);
  1523. end;
  1524. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1525. begin
  1526. SetLength(S,Len);
  1527. If (Buf<>Nil) and (Len>0) then
  1528. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1529. end;
  1530. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1531. var
  1532. BufLen: SizeInt;
  1533. begin
  1534. SetLength(S,Len);
  1535. If (Buf<>Nil) and (Len>0) then
  1536. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1537. end;
  1538. {$ifndef FPUNONE}
  1539. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1540. Var
  1541. SS : String;
  1542. begin
  1543. fpc_Val_Real_UnicodeStr := 0;
  1544. if length(S) > 255 then
  1545. code := 256
  1546. else
  1547. begin
  1548. SS := S;
  1549. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1550. end;
  1551. end;
  1552. {$endif}
  1553. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1554. var ss:shortstring;
  1555. begin
  1556. if length(s)>255 then
  1557. code:=256
  1558. else
  1559. begin
  1560. ss:=s;
  1561. val(ss,fpc_val_enum_unicodestr,code);
  1562. end;
  1563. end;
  1564. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1565. Var
  1566. SS : String;
  1567. begin
  1568. if length(S) > 255 then
  1569. begin
  1570. fpc_Val_Currency_UnicodeStr:=0;
  1571. code := 256;
  1572. end
  1573. else
  1574. begin
  1575. SS := S;
  1576. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1577. end;
  1578. end;
  1579. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1580. Var
  1581. SS : ShortString;
  1582. begin
  1583. fpc_Val_UInt_UnicodeStr := 0;
  1584. if length(S) > 255 then
  1585. code := 256
  1586. else
  1587. begin
  1588. SS := S;
  1589. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1590. end;
  1591. end;
  1592. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1593. Var
  1594. SS : ShortString;
  1595. begin
  1596. fpc_Val_SInt_UnicodeStr:=0;
  1597. if length(S)>255 then
  1598. code:=256
  1599. else
  1600. begin
  1601. SS := S;
  1602. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1603. end;
  1604. end;
  1605. {$ifndef CPU64}
  1606. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1607. Var
  1608. SS : ShortString;
  1609. begin
  1610. fpc_Val_qword_UnicodeStr:=0;
  1611. if length(S)>255 then
  1612. code:=256
  1613. else
  1614. begin
  1615. SS := S;
  1616. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1617. end;
  1618. end;
  1619. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1620. Var
  1621. SS : ShortString;
  1622. begin
  1623. fpc_Val_int64_UnicodeStr:=0;
  1624. if length(S)>255 then
  1625. code:=256
  1626. else
  1627. begin
  1628. SS := S;
  1629. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1630. end;
  1631. end;
  1632. {$endif CPU64}
  1633. {$ifndef FPUNONE}
  1634. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1635. var
  1636. ss : shortstring;
  1637. begin
  1638. str_real(len,fr,d,treal_type(rt),ss);
  1639. s:=ss;
  1640. end;
  1641. {$endif}
  1642. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1643. var ss:shortstring;
  1644. begin
  1645. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1646. s:=ss;
  1647. end;
  1648. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1649. var ss:shortstring;
  1650. begin
  1651. fpc_shortstr_bool(b,len,ss);
  1652. s:=ss;
  1653. end;
  1654. {$ifdef FPC_HAS_STR_CURRENCY}
  1655. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1656. var
  1657. ss : shortstring;
  1658. begin
  1659. str(c:len:fr,ss);
  1660. s:=ss;
  1661. end;
  1662. {$endif FPC_HAS_STR_CURRENCY}
  1663. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1664. Var
  1665. SS : ShortString;
  1666. begin
  1667. Str (v:Len,SS);
  1668. S:=SS;
  1669. end;
  1670. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1671. Var
  1672. SS : ShortString;
  1673. begin
  1674. str(v:Len,SS);
  1675. S:=SS;
  1676. end;
  1677. {$ifndef CPU64}
  1678. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1679. Var
  1680. SS : ShortString;
  1681. begin
  1682. Str (v:Len,SS);
  1683. S:=SS;
  1684. end;
  1685. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1686. Var
  1687. SS : ShortString;
  1688. begin
  1689. str(v:Len,SS);
  1690. S:=SS;
  1691. end;
  1692. {$endif CPU64}
  1693. { converts an utf-16 code point or surrogate pair to utf-32 }
  1694. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1695. var
  1696. w: unicodechar;
  1697. begin
  1698. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1699. { are the same in UTF-32 }
  1700. w:=s[index];
  1701. if (w<=#$d7ff) or
  1702. (w>=#$e000) then
  1703. begin
  1704. result:=UCS4Char(w);
  1705. len:=1;
  1706. end
  1707. { valid surrogate pair? }
  1708. else if (w<=#$dbff) and
  1709. { w>=#$d7ff check not needed, checked above }
  1710. (index<length(s)) and
  1711. (s[index+1]>=#$dc00) and
  1712. (s[index+1]<=#$dfff) then
  1713. { convert the surrogate pair to UTF-32 }
  1714. begin
  1715. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1716. len:=2;
  1717. end
  1718. else
  1719. { invalid surrogate -> do nothing }
  1720. begin
  1721. result:=UCS4Char(w);
  1722. len:=1;
  1723. end;
  1724. end;
  1725. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1726. begin
  1727. if assigned(Source) then
  1728. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1729. else
  1730. Result:=0;
  1731. end;
  1732. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1733. var
  1734. i,j : SizeUInt;
  1735. w : word;
  1736. lw : longword;
  1737. len : longint;
  1738. begin
  1739. result:=0;
  1740. if source=nil then
  1741. exit;
  1742. i:=0;
  1743. j:=0;
  1744. if assigned(Dest) then
  1745. begin
  1746. while (i<SourceChars) and (j<MaxDestBytes) do
  1747. begin
  1748. w:=word(Source[i]);
  1749. case w of
  1750. 0..$7f:
  1751. begin
  1752. Dest[j]:=char(w);
  1753. inc(j);
  1754. end;
  1755. $80..$7ff:
  1756. begin
  1757. if j+1>=MaxDestBytes then
  1758. break;
  1759. Dest[j]:=char($c0 or (w shr 6));
  1760. Dest[j+1]:=char($80 or (w and $3f));
  1761. inc(j,2);
  1762. end;
  1763. $800..$d7ff,$e000..$ffff:
  1764. begin
  1765. if j+2>=MaxDestBytes then
  1766. break;
  1767. Dest[j]:=char($e0 or (w shr 12));
  1768. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1769. Dest[j+2]:=char($80 or (w and $3f));
  1770. inc(j,3);
  1771. end;
  1772. $d800..$dbff:
  1773. {High Surrogates}
  1774. begin
  1775. if j+3>=MaxDestBytes then
  1776. break;
  1777. if (i<sourcechars-1) and
  1778. (word(Source[i+1]) >= $dc00) and
  1779. (word(Source[i+1]) <= $dfff) then
  1780. begin
  1781. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1782. Dest[j]:=char($f0 or (lw shr 18));
  1783. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1784. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1785. Dest[j+3]:=char($80 or (lw and $3f));
  1786. inc(j,4);
  1787. inc(i);
  1788. end;
  1789. end;
  1790. end;
  1791. inc(i);
  1792. end;
  1793. if j>SizeUInt(MaxDestBytes-1) then
  1794. j:=MaxDestBytes-1;
  1795. Dest[j]:=#0;
  1796. end
  1797. else
  1798. begin
  1799. while i<SourceChars do
  1800. begin
  1801. case word(Source[i]) of
  1802. $0..$7f:
  1803. inc(j);
  1804. $80..$7ff:
  1805. inc(j,2);
  1806. $800..$d7ff,$e000..$ffff:
  1807. inc(j,3);
  1808. $d800..$dbff:
  1809. begin
  1810. if (i<sourcechars-1) and
  1811. (word(Source[i+1]) >= $dc00) and
  1812. (word(Source[i+1]) <= $dfff) then
  1813. begin
  1814. inc(j,4);
  1815. inc(i);
  1816. end;
  1817. end;
  1818. end;
  1819. inc(i);
  1820. end;
  1821. end;
  1822. result:=j+1;
  1823. end;
  1824. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1825. begin
  1826. if assigned(Source) then
  1827. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1828. else
  1829. Result:=0;
  1830. end;
  1831. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1832. const
  1833. UNICODE_INVALID=63;
  1834. var
  1835. InputUTF8: SizeUInt;
  1836. IBYTE: BYTE;
  1837. OutputUnicode: SizeUInt;
  1838. PRECHAR: SizeUInt;
  1839. TempBYTE: BYTE;
  1840. CharLen: SizeUint;
  1841. LookAhead: SizeUInt;
  1842. UC: SizeUInt;
  1843. begin
  1844. if not assigned(Source) then
  1845. begin
  1846. result:=0;
  1847. exit;
  1848. end;
  1849. result:=SizeUInt(-1);
  1850. InputUTF8:=0;
  1851. OutputUnicode:=0;
  1852. PreChar:=0;
  1853. if Assigned(Dest) Then
  1854. begin
  1855. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1856. begin
  1857. IBYTE:=byte(Source[InputUTF8]);
  1858. if (IBYTE and $80) = 0 then
  1859. begin
  1860. //One character US-ASCII, convert it to unicode
  1861. if IBYTE = 10 then
  1862. begin
  1863. If (PreChar<>13) and FALSE then
  1864. begin
  1865. //Expand to crlf, conform UTF-8.
  1866. //This procedure will break the memory alocation by
  1867. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1868. if OutputUnicode+1<MaxDestChars then
  1869. begin
  1870. Dest[OutputUnicode]:=WideChar(13);
  1871. inc(OutputUnicode);
  1872. Dest[OutputUnicode]:=WideChar(10);
  1873. inc(OutputUnicode);
  1874. PreChar:=10;
  1875. end
  1876. else
  1877. begin
  1878. Dest[OutputUnicode]:=WideChar(13);
  1879. inc(OutputUnicode);
  1880. end;
  1881. end
  1882. else
  1883. begin
  1884. Dest[OutputUnicode]:=WideChar(IBYTE);
  1885. inc(OutputUnicode);
  1886. PreChar:=IBYTE;
  1887. end;
  1888. end
  1889. else
  1890. begin
  1891. Dest[OutputUnicode]:=WideChar(IBYTE);
  1892. inc(OutputUnicode);
  1893. PreChar:=IBYTE;
  1894. end;
  1895. inc(InputUTF8);
  1896. end
  1897. else
  1898. begin
  1899. TempByte:=IBYTE;
  1900. CharLen:=0;
  1901. while (TempBYTE and $80)<>0 do
  1902. begin
  1903. TempBYTE:=(TempBYTE shl 1) and $FE;
  1904. inc(CharLen);
  1905. end;
  1906. //Test for the "CharLen" conforms UTF-8 string
  1907. //This means the 10xxxxxx pattern.
  1908. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1909. begin
  1910. //Insuficient chars in string to decode
  1911. //UTF-8 array. Fallback to single char.
  1912. CharLen:= 1;
  1913. end;
  1914. for LookAhead := 1 to CharLen-1 do
  1915. begin
  1916. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1917. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1918. begin
  1919. //Invalid UTF-8 sequence, fallback.
  1920. CharLen:= LookAhead;
  1921. break;
  1922. end;
  1923. end;
  1924. UC:=$FFFF;
  1925. case CharLen of
  1926. 1: begin
  1927. //Not valid UTF-8 sequence
  1928. UC:=UNICODE_INVALID;
  1929. end;
  1930. 2: begin
  1931. //Two bytes UTF, convert it
  1932. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1933. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1934. if UC <= $7F then
  1935. begin
  1936. //Invalid UTF sequence.
  1937. UC:=UNICODE_INVALID;
  1938. end;
  1939. end;
  1940. 3: begin
  1941. //Three bytes, convert it to unicode
  1942. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1943. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1944. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1945. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1946. begin
  1947. //Invalid UTF-8 sequence
  1948. UC:= UNICODE_INVALID;
  1949. End;
  1950. end;
  1951. 4: begin
  1952. //Four bytes, convert it to two unicode characters
  1953. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1954. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1955. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1956. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1957. if (UC < $10000) or (UC > $10FFFF) then
  1958. begin
  1959. UC:= UNICODE_INVALID;
  1960. end
  1961. else
  1962. begin
  1963. { only store pair if room }
  1964. dec(UC,$10000);
  1965. if (OutputUnicode<MaxDestChars-1) then
  1966. begin
  1967. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1968. inc(OutputUnicode);
  1969. UC:=(UC and $3ff) + $DC00;
  1970. end
  1971. else
  1972. begin
  1973. InputUTF8:= InputUTF8 + CharLen;
  1974. { don't store anything }
  1975. CharLen:=0;
  1976. end;
  1977. end;
  1978. end;
  1979. 5,6,7: begin
  1980. //Invalid UTF8 to unicode conversion,
  1981. //mask it as invalid UNICODE too.
  1982. UC:=UNICODE_INVALID;
  1983. end;
  1984. end;
  1985. if CharLen > 0 then
  1986. begin
  1987. PreChar:=UC;
  1988. Dest[OutputUnicode]:=WideChar(UC);
  1989. inc(OutputUnicode);
  1990. end;
  1991. InputUTF8:= InputUTF8 + CharLen;
  1992. end;
  1993. end;
  1994. Result:=OutputUnicode+1;
  1995. end
  1996. else
  1997. begin
  1998. while (InputUTF8<SourceBytes) do
  1999. begin
  2000. IBYTE:=byte(Source[InputUTF8]);
  2001. if (IBYTE and $80) = 0 then
  2002. begin
  2003. //One character US-ASCII, convert it to unicode
  2004. if IBYTE = 10 then
  2005. begin
  2006. if (PreChar<>13) and FALSE then
  2007. begin
  2008. //Expand to crlf, conform UTF-8.
  2009. //This procedure will break the memory alocation by
  2010. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  2011. inc(OutputUnicode,2);
  2012. PreChar:=10;
  2013. end
  2014. else
  2015. begin
  2016. inc(OutputUnicode);
  2017. PreChar:=IBYTE;
  2018. end;
  2019. end
  2020. else
  2021. begin
  2022. inc(OutputUnicode);
  2023. PreChar:=IBYTE;
  2024. end;
  2025. inc(InputUTF8);
  2026. end
  2027. else
  2028. begin
  2029. TempByte:=IBYTE;
  2030. CharLen:=0;
  2031. while (TempBYTE and $80)<>0 do
  2032. begin
  2033. TempBYTE:=(TempBYTE shl 1) and $FE;
  2034. inc(CharLen);
  2035. end;
  2036. //Test for the "CharLen" conforms UTF-8 string
  2037. //This means the 10xxxxxx pattern.
  2038. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  2039. begin
  2040. //Insuficient chars in string to decode
  2041. //UTF-8 array. Fallback to single char.
  2042. CharLen:= 1;
  2043. end;
  2044. for LookAhead := 1 to CharLen-1 do
  2045. begin
  2046. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  2047. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  2048. begin
  2049. //Invalid UTF-8 sequence, fallback.
  2050. CharLen:= LookAhead;
  2051. break;
  2052. end;
  2053. end;
  2054. UC:=$FFFF;
  2055. case CharLen of
  2056. 1: begin
  2057. //Not valid UTF-8 sequence
  2058. UC:=UNICODE_INVALID;
  2059. end;
  2060. 2: begin
  2061. //Two bytes UTF, convert it
  2062. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  2063. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  2064. if UC <= $7F then
  2065. begin
  2066. //Invalid UTF sequence.
  2067. UC:=UNICODE_INVALID;
  2068. end;
  2069. end;
  2070. 3: begin
  2071. //Three bytes, convert it to unicode
  2072. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  2073. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  2074. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  2075. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  2076. begin
  2077. //Invalid UTF-8 sequence
  2078. UC:= UNICODE_INVALID;
  2079. end;
  2080. end;
  2081. 4: begin
  2082. //Four bytes, convert it to two unicode characters
  2083. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  2084. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  2085. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  2086. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  2087. if (UC < $10000) or (UC > $10FFFF) then
  2088. UC:= UNICODE_INVALID
  2089. else
  2090. { extra character character }
  2091. inc(OutputUnicode);
  2092. end;
  2093. 5,6,7: begin
  2094. //Invalid UTF8 to unicode conversion,
  2095. //mask it as invalid UNICODE too.
  2096. UC:=UNICODE_INVALID;
  2097. end;
  2098. end;
  2099. if CharLen > 0 then
  2100. begin
  2101. PreChar:=UC;
  2102. inc(OutputUnicode);
  2103. end;
  2104. InputUTF8:= InputUTF8 + CharLen;
  2105. end;
  2106. end;
  2107. Result:=OutputUnicode+1;
  2108. end;
  2109. end;
  2110. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  2111. begin
  2112. Result:=UTF8Encode(UnicodeString(s));
  2113. end;
  2114. function UTF8Encode(const s : UnicodeString) : RawByteString;
  2115. var
  2116. i : SizeInt;
  2117. hs : UTF8String;
  2118. begin
  2119. result:='';
  2120. if s='' then
  2121. exit;
  2122. SetLength(hs,length(s)*3);
  2123. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  2124. if i>0 then
  2125. begin
  2126. SetLength(hs,i-1);
  2127. result:=hs;
  2128. end;
  2129. end;
  2130. function UTF8Decode(const s : RawByteString): UnicodeString;
  2131. var
  2132. i : SizeInt;
  2133. hs : UnicodeString;
  2134. begin
  2135. result:='';
  2136. if s='' then
  2137. exit;
  2138. SetLength(hs,length(s));
  2139. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  2140. if i>0 then
  2141. begin
  2142. SetLength(hs,i-1);
  2143. result:=hs;
  2144. end;
  2145. end;
  2146. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2147. begin
  2148. Result:=Utf8Encode(s);
  2149. end;
  2150. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2151. begin
  2152. Result:=Utf8Decode(s);
  2153. end;
  2154. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  2155. var
  2156. i, slen,
  2157. destindex : SizeInt;
  2158. len : longint;
  2159. begin
  2160. slen:=length(s);
  2161. setlength(result,slen+1);
  2162. i:=1;
  2163. destindex:=0;
  2164. while (i<=slen) do
  2165. begin
  2166. result[destindex]:=utf16toutf32(s,i,len);
  2167. inc(destindex);
  2168. inc(i,len);
  2169. end;
  2170. { destindex <= slen (surrogate pairs may have been merged) }
  2171. { destindex+1 for terminating #0 (dynamic arrays are }
  2172. { implicitely filled with zero) }
  2173. setlength(result,destindex+1);
  2174. end;
  2175. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  2176. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  2177. var
  2178. p : PUnicodeChar;
  2179. begin
  2180. { if nc > $ffff, we need two places }
  2181. if (index+ord(nc > $ffff)>length(s)) then
  2182. if (length(s) < 10*256) then
  2183. setlength(s,length(s)+10)
  2184. else
  2185. setlength(s,length(s)+length(s) shr 8);
  2186. { we know that s is unique -> avoid uniquestring calls}
  2187. p:=@s[index];
  2188. if (nc<$ffff) then
  2189. begin
  2190. p^:=unicodechar(nc);
  2191. inc(index);
  2192. end
  2193. else if (dword(nc)<=$10ffff) then
  2194. begin
  2195. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  2196. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  2197. inc(index,2);
  2198. end
  2199. else
  2200. { invalid code point }
  2201. begin
  2202. p^:='?';
  2203. inc(index);
  2204. end;
  2205. end;
  2206. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  2207. var
  2208. i : SizeInt;
  2209. resindex : SizeInt;
  2210. begin
  2211. { skip terminating #0 }
  2212. SetLength(result,length(s)-1);
  2213. resindex:=1;
  2214. for i:=0 to high(s)-1 do
  2215. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  2216. { adjust result length (may be too big due to growing }
  2217. { for surrogate pairs) }
  2218. setlength(result,resindex-1);
  2219. end;
  2220. function WideStringToUCS4String(const s : WideString) : UCS4String;
  2221. var
  2222. i, slen,
  2223. destindex : SizeInt;
  2224. len : longint;
  2225. begin
  2226. slen:=length(s);
  2227. setlength(result,slen+1);
  2228. i:=1;
  2229. destindex:=0;
  2230. while (i<=slen) do
  2231. begin
  2232. result[destindex]:=utf16toutf32(s,i,len);
  2233. inc(destindex);
  2234. inc(i,len);
  2235. end;
  2236. { destindex <= slen (surrogate pairs may have been merged) }
  2237. { destindex+1 for terminating #0 (dynamic arrays are }
  2238. { implicitely filled with zero) }
  2239. setlength(result,destindex+1);
  2240. end;
  2241. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  2242. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  2243. var
  2244. p : PWideChar;
  2245. begin
  2246. { if nc > $ffff, we need two places }
  2247. if (index+ord(nc > $ffff)>length(s)) then
  2248. if (length(s) < 10*256) then
  2249. setlength(s,length(s)+10)
  2250. else
  2251. setlength(s,length(s)+length(s) shr 8);
  2252. { we know that s is unique -> avoid uniquestring calls}
  2253. p:=@s[index];
  2254. if (nc<$ffff) then
  2255. begin
  2256. p^:=widechar(nc);
  2257. inc(index);
  2258. end
  2259. else if (dword(nc)<=$10ffff) then
  2260. begin
  2261. p^:=widechar((nc - $10000) shr 10 + $d800);
  2262. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  2263. inc(index,2);
  2264. end
  2265. else
  2266. { invalid code point }
  2267. begin
  2268. p^:='?';
  2269. inc(index);
  2270. end;
  2271. end;
  2272. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2273. var
  2274. i : SizeInt;
  2275. resindex : SizeInt;
  2276. begin
  2277. { skip terminating #0 }
  2278. SetLength(result,length(s)-1);
  2279. resindex:=1;
  2280. for i:=0 to high(s)-1 do
  2281. ConcatUTF32ToWideStr(s[i],result,resindex);
  2282. { adjust result length (may be too big due to growing }
  2283. { for surrogate pairs) }
  2284. setlength(result,resindex-1);
  2285. end;
  2286. const
  2287. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2288. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2289. procedure unimplementedunicodestring;
  2290. begin
  2291. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2292. If IsConsole then
  2293. begin
  2294. Writeln(StdErr,SNoUnicodestrings);
  2295. Writeln(StdErr,SRecompileWithUnicodestrings);
  2296. end;
  2297. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2298. HandleErrorFrame(233,get_frame);
  2299. end;
  2300. function StringElementSize(const S: UnicodeString): Word; overload;
  2301. begin
  2302. if assigned(Pointer(S)) then
  2303. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2304. else
  2305. Result:=SizeOf(UnicodeChar);
  2306. end;
  2307. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2308. begin
  2309. if assigned(Pointer(S)) then
  2310. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2311. else
  2312. Result:=0;
  2313. end;
  2314. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2315. begin
  2316. {$ifdef FPC_HAS_CPSTRING}
  2317. if assigned(Pointer(S)) then
  2318. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2319. else
  2320. {$endif FPC_HAS_CPSTRING}
  2321. Result:=DefaultUnicodeCodePage;
  2322. end;
  2323. {$warnings off}
  2324. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2325. begin
  2326. unimplementedunicodestring;
  2327. end;
  2328. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2329. begin
  2330. unimplementedunicodestring;
  2331. end;
  2332. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2333. begin
  2334. unimplementedunicodestring;
  2335. end;
  2336. {$warnings on}
  2337. procedure initunicodestringmanager;
  2338. begin
  2339. {$ifndef HAS_WIDESTRINGMANAGER}
  2340. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2341. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2342. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2343. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2344. {$endif HAS_WIDESTRINGMANAGER}
  2345. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2346. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2347. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2348. {$ifndef HAS_WIDESTRINGMANAGER}
  2349. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2350. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2351. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2352. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2353. {$endif HAS_WIDESTRINGMANAGER}
  2354. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2355. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2356. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2357. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2358. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2359. end;