ustrings.inc 72 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of chars. Multiply with
  20. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  21. @ : String + Terminating #0;
  22. Punicodechar(Unicodestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PUnicodeRec = ^TUnicodeRec;
  29. TUnicodeRec = Packed Record
  30. CodePage : TSystemCodePage;
  31. ElementSize : Word;
  32. {$ifdef CPU64}
  33. { align fields }
  34. Dummy : DWord;
  35. {$endif CPU64}
  36. Ref : SizeInt;
  37. Len : SizeInt;
  38. First : UnicodeChar;
  39. end;
  40. Const
  41. UnicodeRecLen = SizeOf(TUnicodeRec);
  42. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overwritten for the Current Locale
  47. }
  48. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  49. var
  50. i : SizeInt;
  51. p : PAnsiChar;
  52. begin
  53. setlength(dest,len);
  54. p:=pointer(dest); {SetLength guarantees that dest is unique}
  55. for i:=1 to len do
  56. begin
  57. if word(source^)<256 then
  58. p^:=char(word(source^))
  59. else
  60. p^:='?';
  61. inc(source);
  62. inc(p);
  63. end;
  64. end;
  65. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  66. var
  67. i : SizeInt;
  68. p : PUnicodeChar;
  69. begin
  70. setlength(dest,len);
  71. p:=pointer(dest); {SetLength guarantees that dest is unique}
  72. for i:=1 to len do
  73. begin
  74. p^:=unicodechar(byte(source^));
  75. inc(source);
  76. inc(p);
  77. end;
  78. end;
  79. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  80. begin
  81. DefaultCharLengthPChar:=length(Str);
  82. end;
  83. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  84. begin
  85. if str[0]<>#0 then
  86. DefaultCodePointLength:=1
  87. else
  88. DefaultCodePointLength:=0;
  89. end;
  90. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  91. begin
  92. manager:=widestringmanager;
  93. end;
  94. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  95. begin
  96. Old:=widestringmanager;
  97. widestringmanager:=New;
  98. end;
  99. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  100. begin
  101. widestringmanager:=New;
  102. end;
  103. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  104. begin
  105. manager:=widestringmanager;
  106. end;
  107. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  108. begin
  109. Old:=widestringmanager;
  110. widestringmanager:=New;
  111. end;
  112. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  113. begin
  114. widestringmanager:=New;
  115. end;
  116. {****************************************************************************
  117. Internal functions, not in interface.
  118. ****************************************************************************}
  119. procedure UnicodeStringError;
  120. begin
  121. HandleErrorFrame(204,get_frame);
  122. end;
  123. {$ifdef UnicodeStrDebug}
  124. Procedure DumpUnicodeRec(S : Pointer);
  125. begin
  126. If S=Nil then
  127. Writeln ('String is nil')
  128. Else
  129. Begin
  130. With PUnicodeRec(S-UnicodeFirstOff)^ do
  131. begin
  132. Write ('(Len:',len);
  133. Writeln (' Ref: ',ref,')');
  134. end;
  135. end;
  136. end;
  137. {$endif}
  138. Function NewUnicodeString(Len : SizeInt) : Pointer;
  139. {
  140. Allocate a new UnicodeString on the heap.
  141. initialize it to zero length and reference count 1.
  142. }
  143. Var
  144. P : Pointer;
  145. begin
  146. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  147. If P<>Nil then
  148. begin
  149. PUnicodeRec(P)^.Len:=Len; { Initial length }
  150. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  151. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  152. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  153. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  154. inc(p,UnicodeFirstOff); { Points to string now }
  155. end
  156. else
  157. UnicodeStringError;
  158. NewUnicodeString:=P;
  159. end;
  160. Procedure DisposeUnicodeString(Var S : Pointer);
  161. {
  162. Deallocates a UnicodeString From the heap.
  163. }
  164. begin
  165. If S=Nil then
  166. exit;
  167. Dec (S,UnicodeFirstOff);
  168. Freemem(S);
  169. S:=Nil;
  170. end;
  171. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  172. {
  173. Decreases the ReferenceCount of a non constant unicodestring;
  174. If the reference count is zero, deallocate the string;
  175. }
  176. Type
  177. pSizeInt = ^SizeInt;
  178. Var
  179. l : pSizeInt;
  180. Begin
  181. { Zero string }
  182. if S=Nil then
  183. exit;
  184. { check for constant strings ...}
  185. l:=@PUnicodeRec(S-UnicodeFirstOff)^.Ref;
  186. if l^<0 then
  187. exit;
  188. { declocked does a MT safe dec and returns true, if the counter is 0 }
  189. if declocked(l^) then
  190. { Ref count dropped to zero remove }
  191. DisposeUnicodeString(S);
  192. end;
  193. { alias for internal use }
  194. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  195. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  196. Begin
  197. If S=Nil then
  198. exit;
  199. { constant string ? }
  200. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  201. exit;
  202. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  203. end;
  204. { alias for internal use }
  205. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  206. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  207. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  208. {
  209. Converts a UnicodeString to a ShortString;
  210. }
  211. Var
  212. Size : SizeInt;
  213. temp : ansistring;
  214. begin
  215. result:='';
  216. Size:=Length(S2);
  217. if Size>0 then
  218. begin
  219. If Size>high_of_res then
  220. Size:=high_of_res;
  221. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  222. result:=temp;
  223. end;
  224. end;
  225. {$else FPC_STRTOSHORTSTRINGPROC}
  226. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  227. {
  228. Converts a UnicodeString to a ShortString;
  229. }
  230. Var
  231. Size : SizeInt;
  232. temp : ansistring;
  233. begin
  234. res:='';
  235. Size:=Length(S2);
  236. if Size>0 then
  237. begin
  238. If Size>high(res) then
  239. Size:=high(res);
  240. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  241. res:=temp;
  242. end;
  243. end;
  244. {$endif FPC_STRTOSHORTSTRINGPROC}
  245. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  246. {
  247. Converts a ShortString to a UnicodeString;
  248. }
  249. Var
  250. Size : SizeInt;
  251. begin
  252. result:='';
  253. Size:=Length(S2);
  254. if Size>0 then
  255. begin
  256. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  257. { Terminating Zero }
  258. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  259. end;
  260. end;
  261. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  262. {
  263. Converts a UnicodeString to an AnsiString
  264. }
  265. Var
  266. Size : SizeInt;
  267. {$ifndef FPC_HAS_CPSTRING}
  268. cp : TSystemCodePage;
  269. {$endif FPC_HAS_CPSTRING}
  270. begin
  271. {$ifndef FPC_HAS_CPSTRING}
  272. cp:=$ffff;
  273. {$endif FPC_HAS_CPSTRING}
  274. result:='';
  275. Size:=Length(S2);
  276. if Size>0 then
  277. begin
  278. if cp=$ffff then
  279. cp:=DefaultSystemCodePage;
  280. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  281. end;
  282. end;
  283. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  284. {
  285. Converts an AnsiString to a UnicodeString;
  286. }
  287. Var
  288. Size : SizeInt;
  289. cp: TSystemCodePage;
  290. begin
  291. result:='';
  292. Size:=Length(S2);
  293. if Size>0 then
  294. begin
  295. cp:=StringCodePage(S2);
  296. if cp=$ffff then
  297. cp:=DefaultSystemCodePage;
  298. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  299. end;
  300. end;
  301. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  302. begin
  303. SetLength(Result,Length(S2));
  304. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  305. end;
  306. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  307. begin
  308. SetLength(Result,Length(S2));
  309. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  310. end;
  311. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  312. var
  313. Size : SizeInt;
  314. {$ifndef FPC_HAS_CPSTRING}
  315. cp : TSystemCodePage;
  316. {$endif FPC_HAS_CPSTRING}
  317. begin
  318. {$ifndef FPC_HAS_CPSTRING}
  319. cp:=$ffff;
  320. {$endif FPC_HAS_CPSTRING}
  321. result:='';
  322. if p=nil then
  323. exit;
  324. Size := IndexWord(p^, -1, 0);
  325. if Size>0 then
  326. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  327. end;
  328. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  329. var
  330. Size : SizeInt;
  331. begin
  332. result:='';
  333. if p=nil then
  334. exit;
  335. Size := IndexWord(p^, -1, 0);
  336. Setlength(result,Size);
  337. if Size>0 then
  338. begin
  339. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  340. { Terminating Zero }
  341. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  342. end;
  343. end;
  344. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  345. var
  346. Size : SizeInt;
  347. begin
  348. result:='';
  349. if p=nil then
  350. exit;
  351. Size := IndexWord(p^, -1, 0);
  352. Setlength(result,Size);
  353. if Size>0 then
  354. begin
  355. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  356. { Terminating Zero }
  357. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  358. end;
  359. end;
  360. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  361. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  362. var
  363. Size : SizeInt;
  364. temp: ansistring;
  365. begin
  366. result:='';
  367. if p=nil then
  368. exit;
  369. Size := IndexWord(p^, $7fffffff, 0);
  370. if Size>0 then
  371. begin
  372. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  373. result:=temp;
  374. end;
  375. end;
  376. {$else FPC_STRTOSHORTSTRINGPROC}
  377. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  378. var
  379. Size : SizeInt;
  380. temp: ansistring;
  381. begin
  382. res:='';
  383. if p=nil then
  384. exit;
  385. Size:=IndexWord(p^, high(PtrInt), 0);
  386. if Size>0 then
  387. begin
  388. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  389. res:=temp;
  390. end;
  391. end;
  392. {$endif FPC_STRTOSHORTSTRINGPROC}
  393. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  394. var
  395. Size : SizeInt;
  396. {$ifndef FPC_HAS_CPSTRING}
  397. cp : TSystemCodePage;
  398. {$endif FPC_HAS_CPSTRING}
  399. begin
  400. {$ifndef FPC_HAS_CPSTRING}
  401. cp:=$ffff;
  402. {$endif FPC_HAS_CPSTRING}
  403. result:='';
  404. if p=nil then
  405. exit;
  406. Size := IndexWord(p^, -1, 0);
  407. if Size>0 then
  408. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  409. end;
  410. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  411. Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
  412. var
  413. Size : SizeInt;
  414. temp: ansistring;
  415. begin
  416. result:='';
  417. if p=nil then
  418. exit;
  419. Size := IndexWord(p^, $7fffffff, 0);
  420. if Size>0 then
  421. begin
  422. widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
  423. result:=temp;
  424. end;
  425. end;
  426. {$else FPC_STRTOSHORTSTRINGPROC}
  427. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  428. var
  429. Size : SizeInt;
  430. temp: ansistring;
  431. begin
  432. res:='';
  433. if p=nil then
  434. exit;
  435. Size:=IndexWord(p^, high(PtrInt), 0);
  436. if Size>0 then
  437. begin
  438. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  439. res:=temp;
  440. end;
  441. end;
  442. {$endif FPC_STRTOSHORTSTRINGPROC}
  443. { checked against the ansistring routine, 2001-05-27 (FK) }
  444. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  445. {
  446. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  447. }
  448. begin
  449. If S2<>nil then
  450. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  451. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  452. { Decrease the reference count on the old S1 }
  453. fpc_unicodestr_decr_ref (S1);
  454. s1:=s2;
  455. end;
  456. { alias for internal use }
  457. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  458. {$ifndef STR_CONCAT_PROCS}
  459. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  460. Var
  461. Size,Location : SizeInt;
  462. pc : punicodechar;
  463. begin
  464. { only assign if s1 or s2 is empty }
  465. if (S1='') then
  466. begin
  467. result:=s2;
  468. exit;
  469. end;
  470. if (S2='') then
  471. begin
  472. result:=s1;
  473. exit;
  474. end;
  475. Location:=Length(S1);
  476. Size:=length(S2);
  477. SetLength(result,Size+Location);
  478. pc:=punicodechar(result);
  479. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  480. inc(pc,location);
  481. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  482. end;
  483. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  484. Var
  485. i : Longint;
  486. p : pointer;
  487. pc : punicodechar;
  488. Size,NewSize : SizeInt;
  489. begin
  490. { First calculate size of the result so we can do
  491. a single call to SetLength() }
  492. NewSize:=0;
  493. for i:=low(sarr) to high(sarr) do
  494. inc(Newsize,length(sarr[i]));
  495. SetLength(result,NewSize);
  496. pc:=punicodechar(result);
  497. for i:=low(sarr) to high(sarr) do
  498. begin
  499. p:=pointer(sarr[i]);
  500. if assigned(p) then
  501. begin
  502. Size:=length(unicodestring(p));
  503. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  504. inc(pc,size);
  505. end;
  506. end;
  507. end;
  508. {$else STR_CONCAT_PROCS}
  509. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  510. Var
  511. Size,Location : SizeInt;
  512. same : boolean;
  513. begin
  514. { only assign if s1 or s2 is empty }
  515. if (S1='') then
  516. begin
  517. DestS:=s2;
  518. exit;
  519. end;
  520. if (S2='') then
  521. begin
  522. DestS:=s1;
  523. exit;
  524. end;
  525. Location:=Length(S1);
  526. Size:=length(S2);
  527. { Use Pointer() typecasts to prevent extra conversion code }
  528. if Pointer(DestS)=Pointer(S1) then
  529. begin
  530. same:=Pointer(S1)=Pointer(S2);
  531. SetLength(DestS,Size+Location);
  532. if same then
  533. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  534. else
  535. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  536. end
  537. else if Pointer(DestS)=Pointer(S2) then
  538. begin
  539. SetLength(DestS,Size+Location);
  540. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  541. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  542. end
  543. else
  544. begin
  545. DestS:='';
  546. SetLength(DestS,Size+Location);
  547. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  548. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  549. end;
  550. end;
  551. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  552. Var
  553. i : Longint;
  554. p,pc : pointer;
  555. Size,NewLen : SizeInt;
  556. lowstart : longint;
  557. destcopy : pointer;
  558. OldDestLen : SizeInt;
  559. begin
  560. if high(sarr)=0 then
  561. begin
  562. DestS:='';
  563. exit;
  564. end;
  565. destcopy:=nil;
  566. lowstart:=low(sarr);
  567. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  568. inc(lowstart);
  569. { Check for another reuse, then we can't use
  570. the append optimization }
  571. for i:=lowstart to high(sarr) do
  572. begin
  573. if Pointer(DestS)=Pointer(sarr[i]) then
  574. begin
  575. { if DestS is used somewhere in the middle of the expression,
  576. we need to make sure the original string still exists after
  577. we empty/modify DestS.
  578. This trick only works with reference counted strings. Therefor
  579. this optimization is disabled for WINLIKEUNICODESTRING }
  580. destcopy:=pointer(dests);
  581. fpc_UnicodeStr_Incr_Ref(destcopy);
  582. lowstart:=low(sarr);
  583. break;
  584. end;
  585. end;
  586. { Start with empty DestS if we start with concatting
  587. the first array element }
  588. if lowstart=low(sarr) then
  589. DestS:='';
  590. OldDestLen:=length(DestS);
  591. { Calculate size of the result so we can do
  592. a single call to SetLength() }
  593. NewLen:=0;
  594. for i:=low(sarr) to high(sarr) do
  595. inc(NewLen,length(sarr[i]));
  596. SetLength(DestS,NewLen);
  597. { Concat all strings, except the string we already
  598. copied in DestS }
  599. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  600. for i:=lowstart to high(sarr) do
  601. begin
  602. p:=pointer(sarr[i]);
  603. if assigned(p) then
  604. begin
  605. Size:=length(unicodestring(p));
  606. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  607. inc(pc,size*sizeof(UnicodeChar));
  608. end;
  609. end;
  610. fpc_UnicodeStr_Decr_Ref(destcopy);
  611. end;
  612. {$endif STR_CONCAT_PROCS}
  613. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  614. var
  615. w: unicodestring;
  616. begin
  617. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  618. fpc_Char_To_UChar:=w[1];
  619. end;
  620. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  621. {
  622. Converts a Char to a UnicodeString;
  623. }
  624. begin
  625. Setlength(fpc_Char_To_UnicodeStr,1);
  626. fpc_Char_To_UnicodeStr[1]:=c;
  627. { Terminating Zero }
  628. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  629. end;
  630. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  631. {
  632. Converts a UnicodeChar to a Char;
  633. }
  634. var
  635. s: ansistring;
  636. begin
  637. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  638. if length(s)=1 then
  639. fpc_UChar_To_Char:= s[1]
  640. else
  641. fpc_UChar_To_Char:='?';
  642. end;
  643. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  644. {
  645. Converts a WideChar to a UnicodeString;
  646. }
  647. begin
  648. Setlength (Result,1);
  649. Result[1]:= c;
  650. end;
  651. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  652. var
  653. w: widestring;
  654. begin
  655. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  656. fpc_Char_To_WChar:=w[1];
  657. end;
  658. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  659. {
  660. Converts a WideChar to a Char;
  661. }
  662. var
  663. s: ansistring;
  664. begin
  665. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  666. if length(s)=1 then
  667. fpc_WChar_To_Char:= s[1]
  668. else
  669. fpc_WChar_To_Char:='?';
  670. end;
  671. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  672. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  673. {
  674. Converts a WideChar to a ShortString;
  675. }
  676. var
  677. s: ansistring;
  678. begin
  679. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  680. fpc_WChar_To_ShortStr:= s;
  681. end;
  682. {$else FPC_STRTOSHORTSTRINGPROC}
  683. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  684. {
  685. Converts a WideChar to a ShortString;
  686. }
  687. var
  688. s: ansistring;
  689. begin
  690. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  691. res:=s;
  692. end;
  693. {$endif FPC_STRTOSHORTSTRINGPROC}
  694. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  695. {
  696. Converts a UnicodeChar to a UnicodeString;
  697. }
  698. begin
  699. Setlength (fpc_UChar_To_UnicodeStr,1);
  700. fpc_UChar_To_UnicodeStr[1]:= c;
  701. end;
  702. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  703. {
  704. Converts a UnicodeChar to a AnsiString;
  705. }
  706. {$ifndef FPC_HAS_CPSTRING}
  707. var
  708. cp : TSystemCodePage;
  709. {$endif FPC_HAS_CPSTRING}
  710. begin
  711. {$ifndef FPC_HAS_CPSTRING}
  712. cp:=$ffff;
  713. {$endif FPC_HAS_CPSTRING}
  714. if cp=$ffff then
  715. cp:=DefaultSystemCodePage;
  716. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  717. end;
  718. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  719. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  720. {
  721. Converts a UnicodeChar to a ShortString;
  722. }
  723. var
  724. s: ansistring;
  725. begin
  726. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  727. fpc_UChar_To_ShortStr:= s;
  728. end;
  729. {$else FPC_STRTOSHORTSTRINGPROC}
  730. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  731. {
  732. Converts a UnicodeChar to a ShortString;
  733. }
  734. var
  735. s: ansistring;
  736. begin
  737. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  738. res:=s;
  739. end;
  740. {$endif FPC_STRTOSHORTSTRINGPROC}
  741. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  742. Var
  743. L : SizeInt;
  744. begin
  745. if (not assigned(p)) or (p[0]=#0) Then
  746. begin
  747. fpc_pchar_to_unicodestr := '';
  748. exit;
  749. end;
  750. l:=IndexChar(p^,-1,#0);
  751. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  752. end;
  753. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  754. var
  755. i : SizeInt;
  756. begin
  757. if zerobased then
  758. begin
  759. if arr[0]=#0 Then
  760. begin
  761. fpc_chararray_to_unicodestr:='';
  762. exit;
  763. end;
  764. i:=IndexChar(arr,high(arr)+1,#0);
  765. if i=-1 then
  766. i:=high(arr)+1;
  767. end
  768. else
  769. i:=high(arr)+1;
  770. SetLength(fpc_CharArray_To_UnicodeStr,i);
  771. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  772. end;
  773. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  774. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  775. var
  776. l: longint;
  777. index: longint;
  778. len: byte;
  779. temp: ansistring;
  780. begin
  781. l := high(arr)+1;
  782. if l>=256 then
  783. l:=255
  784. else if l<0 then
  785. l:=0;
  786. if zerobased then
  787. begin
  788. index:=IndexWord(arr[0],l,0);
  789. if (index < 0) then
  790. len := l
  791. else
  792. len := index;
  793. end
  794. else
  795. len := l;
  796. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  797. fpc_UnicodeCharArray_To_ShortStr := temp;
  798. end;
  799. {$else FPC_STRTOSHORTSTRINGPROC}
  800. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  801. var
  802. l: longint;
  803. index: ptrint;
  804. len: byte;
  805. temp: ansistring;
  806. begin
  807. l := high(arr)+1;
  808. if l>=high(res)+1 then
  809. l:=high(res)
  810. else if l<0 then
  811. l:=0;
  812. if zerobased then
  813. begin
  814. index:=IndexWord(arr[0],l,0);
  815. if index<0 then
  816. len:=l
  817. else
  818. len:=index;
  819. end
  820. else
  821. len:=l;
  822. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  823. res:=temp;
  824. end;
  825. {$endif FPC_STRTOSHORTSTRINGPROC}
  826. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING}zerobased: boolean = true): AnsiString; compilerproc;
  827. var
  828. i : SizeInt;
  829. {$ifndef FPC_HAS_CPSTRING}
  830. cp : TSystemCodePage;
  831. {$endif FPC_HAS_CPSTRING}
  832. begin
  833. {$ifndef FPC_HAS_CPSTRING}
  834. cp:=$ffff;
  835. {$endif FPC_HAS_CPSTRING}
  836. if (zerobased) then
  837. begin
  838. i:=IndexWord(arr,high(arr)+1,0);
  839. if i = -1 then
  840. i := high(arr)+1;
  841. end
  842. else
  843. i := high(arr)+1;
  844. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  845. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  846. end;
  847. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  848. var
  849. i : SizeInt;
  850. begin
  851. if (zerobased) then
  852. begin
  853. i:=IndexWord(arr,high(arr)+1,0);
  854. if i = -1 then
  855. i := high(arr)+1;
  856. end
  857. else
  858. i := high(arr)+1;
  859. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  860. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  861. end;
  862. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  863. var
  864. i : SizeInt;
  865. begin
  866. if (zerobased) then
  867. begin
  868. i:=IndexWord(arr,high(arr)+1,0);
  869. if i = -1 then
  870. i := high(arr)+1;
  871. end
  872. else
  873. i := high(arr)+1;
  874. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  875. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  876. end;
  877. { due to their names, the following procedures should be in wstrings.inc,
  878. however, the compiler generates code using this functions on all platforms }
  879. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  880. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  881. var
  882. l: longint;
  883. index: longint;
  884. len: byte;
  885. temp: ansistring;
  886. begin
  887. l := high(arr)+1;
  888. if l>=256 then
  889. l:=255
  890. else if l<0 then
  891. l:=0;
  892. if zerobased then
  893. begin
  894. index:=IndexWord(arr[0],l,0);
  895. if (index < 0) then
  896. len := l
  897. else
  898. len := index;
  899. end
  900. else
  901. len := l;
  902. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  903. fpc_WideCharArray_To_ShortStr := temp;
  904. end;
  905. {$else FPC_STRTOSHORTSTRINGPROC}
  906. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  907. var
  908. l: longint;
  909. index: ptrint;
  910. len: byte;
  911. temp: ansistring;
  912. begin
  913. l := high(arr)+1;
  914. if l>=high(res)+1 then
  915. l:=high(res)
  916. else if l<0 then
  917. l:=0;
  918. if zerobased then
  919. begin
  920. index:=IndexWord(arr[0],l,0);
  921. if index<0 then
  922. len:=l
  923. else
  924. len:=index;
  925. end
  926. else
  927. len:=l;
  928. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  929. res:=temp;
  930. end;
  931. {$endif FPC_STRTOSHORTSTRINGPROC}
  932. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  933. var
  934. i : SizeInt;
  935. {$ifndef FPC_HAS_CPSTRING}
  936. cp : TSystemCodePage;
  937. {$endif FPC_HAS_CPSTRING}
  938. begin
  939. {$ifndef FPC_HAS_CPSTRING}
  940. cp:=$ffff;
  941. {$endif FPC_HAS_CPSTRING}
  942. if (zerobased) then
  943. begin
  944. i:=IndexWord(arr,high(arr)+1,0);
  945. if i = -1 then
  946. i := high(arr)+1;
  947. end
  948. else
  949. i := high(arr)+1;
  950. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  951. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  952. end;
  953. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  954. var
  955. i : SizeInt;
  956. begin
  957. if (zerobased) then
  958. begin
  959. i:=IndexWord(arr,high(arr)+1,0);
  960. if i = -1 then
  961. i := high(arr)+1;
  962. end
  963. else
  964. i := high(arr)+1;
  965. SetLength(fpc_WideCharArray_To_WideStr,i);
  966. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  967. end;
  968. {$ifndef FPC_STRTOCHARARRAYPROC}
  969. { inside the compiler, the resulttype is modified to that of the actual }
  970. { chararray we're converting to (JM) }
  971. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  972. var
  973. len: SizeInt;
  974. temp: ansistring;
  975. begin
  976. len := length(src);
  977. { make sure we don't dereference src if it can be nil (JM) }
  978. if len > 0 then
  979. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  980. len := length(temp);
  981. if len > arraysize then
  982. len := arraysize;
  983. {$r-}
  984. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  985. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  986. {$ifdef RangeCheckWasOn}
  987. {$r+}
  988. {$endif}
  989. end;
  990. { inside the compiler, the resulttype is modified to that of the actual }
  991. { unicodechararray we're converting to (JM) }
  992. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  993. var
  994. len: SizeInt;
  995. begin
  996. len := length(src);
  997. if len > arraysize then
  998. len := arraysize;
  999. {$r-}
  1000. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1001. if len > 0 then
  1002. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  1003. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1004. {$ifdef RangeCheckWasOn}
  1005. {$r+}
  1006. {$endif}
  1007. end;
  1008. { inside the compiler, the resulttype is modified to that of the actual }
  1009. { chararray we're converting to (JM) }
  1010. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  1011. var
  1012. len: SizeInt;
  1013. temp: unicodestring;
  1014. begin
  1015. len := length(src);
  1016. { make sure we don't dereference src if it can be nil (JM) }
  1017. if len > 0 then
  1018. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1019. len := length(temp);
  1020. if len > arraysize then
  1021. len := arraysize;
  1022. {$r-}
  1023. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  1024. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1025. {$ifdef RangeCheckWasOn}
  1026. {$r+}
  1027. {$endif}
  1028. end;
  1029. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  1030. var
  1031. len: longint;
  1032. temp : unicodestring;
  1033. begin
  1034. len := length(src);
  1035. { make sure we don't access char 1 if length is 0 (JM) }
  1036. if len > 0 then
  1037. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  1038. len := length(temp);
  1039. if len > arraysize then
  1040. len := arraysize;
  1041. {$r-}
  1042. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  1043. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  1044. {$ifdef RangeCheckWasOn}
  1045. {$r+}
  1046. {$endif}
  1047. end;
  1048. {$else ndef FPC_STRTOCHARARRAYPROC}
  1049. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  1050. var
  1051. len: SizeInt;
  1052. temp: ansistring;
  1053. begin
  1054. len := length(src);
  1055. { make sure we don't dereference src if it can be nil (JM) }
  1056. if len > 0 then
  1057. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  1058. len := length(temp);
  1059. if len > length(res) then
  1060. len := length(res);
  1061. {$r-}
  1062. move(temp[1],res[0],len);
  1063. fillchar(res[len],length(res)-len,0);
  1064. {$ifdef RangeCheckWasOn}
  1065. {$r+}
  1066. {$endif}
  1067. end;
  1068. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  1069. var
  1070. len: SizeInt;
  1071. begin
  1072. len := length(src);
  1073. if len > length(res) then
  1074. len := length(res);
  1075. {$r-}
  1076. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1077. if len > 0 then
  1078. move(src[1],res[0],len*SizeOf(UnicodeChar));
  1079. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1080. {$ifdef RangeCheckWasOn}
  1081. {$r+}
  1082. {$endif}
  1083. end;
  1084. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  1085. var
  1086. len: SizeInt;
  1087. temp: unicodestring;
  1088. begin
  1089. len := length(src);
  1090. { make sure we don't dereference src if it can be nil (JM) }
  1091. if len > 0 then
  1092. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1093. len := length(temp);
  1094. if len > length(res) then
  1095. len := length(res);
  1096. {$r-}
  1097. move(temp[1],res[0],len*sizeof(unicodechar));
  1098. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1099. {$ifdef RangeCheckWasOn}
  1100. {$r+}
  1101. {$endif}
  1102. end;
  1103. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  1104. var
  1105. len: longint;
  1106. temp : unicodestring;
  1107. begin
  1108. len := length(src);
  1109. { make sure we don't access char 1 if length is 0 (JM) }
  1110. if len > 0 then
  1111. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1112. len := length(temp);
  1113. if len > length(res) then
  1114. len := length(res);
  1115. {$r-}
  1116. move(temp[1],res[0],len*sizeof(unicodechar));
  1117. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1118. {$ifdef RangeCheckWasOn}
  1119. {$r+}
  1120. {$endif}
  1121. end;
  1122. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
  1123. var
  1124. len: SizeInt;
  1125. temp: widestring;
  1126. begin
  1127. len := length(src);
  1128. { make sure we don't dereference src if it can be nil (JM) }
  1129. if len > 0 then
  1130. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  1131. len := length(temp);
  1132. if len > length(res) then
  1133. len := length(res);
  1134. {$r-}
  1135. move(temp[1],res[0],len*sizeof(widechar));
  1136. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1137. {$ifdef RangeCheckWasOn}
  1138. {$r+}
  1139. {$endif}
  1140. end;
  1141. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  1142. var
  1143. len: longint;
  1144. temp : widestring;
  1145. begin
  1146. len := length(src);
  1147. { make sure we don't access char 1 if length is 0 (JM) }
  1148. if len > 0 then
  1149. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1150. len := length(temp);
  1151. if len > length(res) then
  1152. len := length(res);
  1153. {$r-}
  1154. move(temp[1],res[0],len*sizeof(widechar));
  1155. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1156. {$ifdef RangeCheckWasOn}
  1157. {$r+}
  1158. {$endif}
  1159. end;
  1160. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  1161. var
  1162. len: SizeInt;
  1163. begin
  1164. len := length(src);
  1165. if len > length(res) then
  1166. len := length(res);
  1167. {$r-}
  1168. { make sure we don't try to access element 1 of the widestring if it's nil }
  1169. if len > 0 then
  1170. move(src[1],res[0],len*SizeOf(WideChar));
  1171. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1172. {$ifdef RangeCheckWasOn}
  1173. {$r+}
  1174. {$endif}
  1175. end;
  1176. {$endif ndef FPC_STRTOCHARARRAYPROC}
  1177. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  1178. {
  1179. Compares 2 UnicodeStrings;
  1180. The result is
  1181. <0 if S1<S2
  1182. 0 if S1=S2
  1183. >0 if S1>S2
  1184. }
  1185. Var
  1186. MaxI,Temp : SizeInt;
  1187. begin
  1188. if pointer(S1)=pointer(S2) then
  1189. begin
  1190. fpc_UnicodeStr_Compare:=0;
  1191. exit;
  1192. end;
  1193. Maxi:=Length(S1);
  1194. temp:=Length(S2);
  1195. If MaxI>Temp then
  1196. MaxI:=Temp;
  1197. Temp:=CompareWord(S1[1],S2[1],MaxI);
  1198. if temp=0 then
  1199. temp:=Length(S1)-Length(S2);
  1200. fpc_UnicodeStr_Compare:=Temp;
  1201. end;
  1202. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  1203. {
  1204. Compares 2 UnicodeStrings for equality only;
  1205. The result is
  1206. 0 if S1=S2
  1207. <>0 if S1<>S2
  1208. }
  1209. Var
  1210. MaxI : SizeInt;
  1211. begin
  1212. if pointer(S1)=pointer(S2) then
  1213. exit(0);
  1214. Maxi:=Length(S1);
  1215. If MaxI<>Length(S2) then
  1216. exit(-1)
  1217. else
  1218. exit(CompareWord(S1[1],S2[1],MaxI));
  1219. end;
  1220. {$ifdef VER2_4}
  1221. // obsolete but needed for bootstrapping with 2.4
  1222. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1223. begin
  1224. if p=nil then
  1225. HandleErrorFrame(201,get_frame);
  1226. end;
  1227. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1228. begin
  1229. if (index>len div 2) or (Index<1) then
  1230. HandleErrorFrame(201,get_frame);
  1231. end;
  1232. {$else VER2_4}
  1233. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1234. begin
  1235. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len div 2) or (Index<1) then
  1236. HandleErrorFrame(201,get_frame);
  1237. end;
  1238. {$endif VER2_4}
  1239. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1240. {
  1241. Sets The length of string S to L.
  1242. Makes sure S is unique, and contains enough room.
  1243. }
  1244. Var
  1245. Temp : Pointer;
  1246. movelen: SizeInt;
  1247. begin
  1248. if (l>0) then
  1249. begin
  1250. if Pointer(S)=nil then
  1251. begin
  1252. { Need a complete new string...}
  1253. Pointer(s):=NewUnicodeString(l);
  1254. end
  1255. else
  1256. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1257. begin
  1258. Dec(Pointer(S),UnicodeFirstOff);
  1259. if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
  1260. reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
  1261. Inc(Pointer(S), UnicodeFirstOff);
  1262. end
  1263. else
  1264. begin
  1265. { Reallocation is needed... }
  1266. Temp:=Pointer(NewUnicodeString(L));
  1267. if Length(S)>0 then
  1268. begin
  1269. if l < succ(length(s)) then
  1270. movelen := l
  1271. { also move terminating null }
  1272. else
  1273. movelen := succ(length(s));
  1274. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1275. end;
  1276. fpc_unicodestr_decr_ref(Pointer(S));
  1277. Pointer(S):=Temp;
  1278. end;
  1279. { Force nil termination in case it gets shorter }
  1280. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1281. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  1282. end
  1283. else
  1284. begin
  1285. { Length=0 }
  1286. if Pointer(S)<>nil then
  1287. fpc_unicodestr_decr_ref (Pointer(S));
  1288. Pointer(S):=Nil;
  1289. end;
  1290. end;
  1291. {*****************************************************************************
  1292. Public functions, In interface.
  1293. *****************************************************************************}
  1294. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1295. begin
  1296. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1297. end;
  1298. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1299. var
  1300. temp:unicodestring;
  1301. begin
  1302. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1303. if Length(temp)<DestSize then
  1304. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1305. else
  1306. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1307. Dest[DestSize-1]:=#0;
  1308. result:=Dest;
  1309. end;
  1310. function WideCharToString(S : PWideChar) : UnicodeString;
  1311. begin
  1312. result:=WideCharLenToString(s,Length(WideString(s)));
  1313. end;
  1314. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1315. var
  1316. temp:widestring;
  1317. begin
  1318. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1319. if Length(temp)<DestSize then
  1320. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1321. else
  1322. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1323. Dest[DestSize-1]:=#0;
  1324. result:=Dest;
  1325. end;
  1326. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1327. begin
  1328. SetLength(result,Len);
  1329. Move(S^,Pointer(Result)^,Len*2);
  1330. end;
  1331. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1332. begin
  1333. Dest:=UnicodeCharLenToString(Src,Len);
  1334. end;
  1335. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1336. begin
  1337. Dest:=UnicodeCharLenToString(Src,Len);
  1338. end;
  1339. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1340. begin
  1341. Dest:=UnicodeCharToString(S);
  1342. end;
  1343. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1344. begin
  1345. SetLength(result,Len);
  1346. Move(S^,Pointer(Result)^,Len*2);
  1347. end;
  1348. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1349. begin
  1350. Dest:=WideCharLenToString(Src,Len);
  1351. end;
  1352. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1353. begin
  1354. Dest:=WideCharLenToString(Src,Len);
  1355. end;
  1356. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1357. begin
  1358. Dest:=WideCharToString(S);
  1359. end;
  1360. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1361. begin
  1362. Dest:=WideCharToString(S);
  1363. end;
  1364. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1365. {
  1366. Make sure reference count of S is 1,
  1367. using copy-on-write semantics.
  1368. }
  1369. Var
  1370. SNew : Pointer;
  1371. L : SizeInt;
  1372. begin
  1373. pointer(result) := pointer(s);
  1374. If Pointer(S)=Nil then
  1375. exit;
  1376. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1377. begin
  1378. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1379. SNew:=NewUnicodeString (L);
  1380. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1381. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1382. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1383. pointer(S):=SNew;
  1384. pointer(result):=SNew;
  1385. end;
  1386. end;
  1387. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1388. var
  1389. ResultAddress : Pointer;
  1390. begin
  1391. ResultAddress:=Nil;
  1392. dec(index);
  1393. if Index < 0 then
  1394. Index := 0;
  1395. { Check Size. Accounts for Zero-length S, the double check is needed because
  1396. Size can be maxint and will get <0 when adding index }
  1397. if (Size>Length(S)) or
  1398. (Index+Size>Length(S)) then
  1399. Size:=Length(S)-Index;
  1400. If Size>0 then
  1401. begin
  1402. If Index<0 Then
  1403. Index:=0;
  1404. ResultAddress:=Pointer(NewUnicodeString (Size));
  1405. if ResultAddress<>Nil then
  1406. begin
  1407. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1408. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1409. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1410. end;
  1411. end;
  1412. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1413. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1414. end;
  1415. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1416. var
  1417. i,MaxLen : SizeInt;
  1418. pc : punicodechar;
  1419. begin
  1420. Pos:=0;
  1421. if Length(SubStr)>0 then
  1422. begin
  1423. MaxLen:=Length(source)-Length(SubStr);
  1424. i:=0;
  1425. pc:=@source[1];
  1426. while (i<=MaxLen) do
  1427. begin
  1428. inc(i);
  1429. if (SubStr[1]=pc^) and
  1430. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1431. begin
  1432. Pos:=i;
  1433. exit;
  1434. end;
  1435. inc(pc);
  1436. end;
  1437. end;
  1438. end;
  1439. { Faster version for a unicodechar alone }
  1440. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1441. var
  1442. i: SizeInt;
  1443. pc : punicodechar;
  1444. begin
  1445. pc:=@s[1];
  1446. for i:=1 to length(s) do
  1447. begin
  1448. if pc^=c then
  1449. begin
  1450. pos:=i;
  1451. exit;
  1452. end;
  1453. inc(pc);
  1454. end;
  1455. pos:=0;
  1456. end;
  1457. Function Pos (c : RawByteString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1458. begin
  1459. result:=Pos(UnicodeString(c),s);
  1460. end;
  1461. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1462. begin
  1463. result:=Pos(UnicodeString(c),s);
  1464. end;
  1465. Function Pos (c : UnicodeString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1466. begin
  1467. result:=Pos(c,UnicodeString(s));
  1468. end;
  1469. { Faster version for a char alone. Must be implemented because }
  1470. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1471. { using pos(char,pchar) will always call the shortstring version }
  1472. { (exact match for first argument), also with $h+ (JM) }
  1473. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1474. var
  1475. i: SizeInt;
  1476. wc : unicodechar;
  1477. pc : punicodechar;
  1478. begin
  1479. wc:=c;
  1480. pc:=@s[1];
  1481. for i:=1 to length(s) do
  1482. begin
  1483. if pc^=wc then
  1484. begin
  1485. pos:=i;
  1486. exit;
  1487. end;
  1488. inc(pc);
  1489. end;
  1490. pos:=0;
  1491. end;
  1492. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1493. Var
  1494. LS : SizeInt;
  1495. begin
  1496. LS:=Length(S);
  1497. if (Index>LS) or (Index<=0) or (Size<=0) then
  1498. exit;
  1499. UniqueString (S);
  1500. { (Size+Index) will overflow if Size=MaxInt. }
  1501. if Size>LS-Index then
  1502. Size:=LS-Index+1;
  1503. if Size<=LS-Index then
  1504. begin
  1505. Dec(Index);
  1506. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1507. end;
  1508. Setlength(s,LS-Size);
  1509. end;
  1510. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1511. var
  1512. Temp : UnicodeString;
  1513. LS : SizeInt;
  1514. begin
  1515. If Length(Source)=0 then
  1516. exit;
  1517. if index <= 0 then
  1518. index := 1;
  1519. Ls:=Length(S);
  1520. if index > LS then
  1521. index := LS+1;
  1522. Dec(Index);
  1523. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1524. SetLength(Temp,Length(Source)+LS);
  1525. If Index>0 then
  1526. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1527. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1528. If (LS-Index)>0 then
  1529. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1530. S:=Temp;
  1531. end;
  1532. Function UpCase(c:UnicodeChar):UnicodeChar;
  1533. var
  1534. s : UnicodeString;
  1535. begin
  1536. s:=c;
  1537. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1538. end;
  1539. function UpCase(const s : UnicodeString) : UnicodeString;
  1540. begin
  1541. result:=widestringmanager.UpperUnicodeStringProc(s);
  1542. end;
  1543. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1544. begin
  1545. SetLength(S,Len);
  1546. If (Buf<>Nil) and (Len>0) then
  1547. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1548. end;
  1549. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1550. var
  1551. BufLen: SizeInt;
  1552. begin
  1553. SetLength(S,Len);
  1554. If (Buf<>Nil) and (Len>0) then
  1555. begin
  1556. BufLen := IndexByte(Buf^, Len+1, 0);
  1557. If (BufLen>0) and (BufLen < Len) then
  1558. Len := BufLen;
  1559. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1560. //PUnicodeChar(Pointer(S)+Len*sizeof(UnicodeChar))^:=#0;
  1561. end;
  1562. end;
  1563. {$ifndef FPUNONE}
  1564. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1565. Var
  1566. SS : String;
  1567. begin
  1568. fpc_Val_Real_UnicodeStr := 0;
  1569. if length(S) > 255 then
  1570. code := 256
  1571. else
  1572. begin
  1573. SS := S;
  1574. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1575. end;
  1576. end;
  1577. {$endif}
  1578. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1579. var ss:shortstring;
  1580. begin
  1581. if length(s)>255 then
  1582. code:=256
  1583. else
  1584. begin
  1585. ss:=s;
  1586. val(ss,fpc_val_enum_unicodestr,code);
  1587. end;
  1588. end;
  1589. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1590. Var
  1591. SS : String;
  1592. begin
  1593. if length(S) > 255 then
  1594. begin
  1595. fpc_Val_Currency_UnicodeStr:=0;
  1596. code := 256;
  1597. end
  1598. else
  1599. begin
  1600. SS := S;
  1601. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1602. end;
  1603. end;
  1604. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1605. Var
  1606. SS : ShortString;
  1607. begin
  1608. fpc_Val_UInt_UnicodeStr := 0;
  1609. if length(S) > 255 then
  1610. code := 256
  1611. else
  1612. begin
  1613. SS := S;
  1614. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1615. end;
  1616. end;
  1617. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1618. Var
  1619. SS : ShortString;
  1620. begin
  1621. fpc_Val_SInt_UnicodeStr:=0;
  1622. if length(S)>255 then
  1623. code:=256
  1624. else
  1625. begin
  1626. SS := S;
  1627. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1628. end;
  1629. end;
  1630. {$ifndef CPU64}
  1631. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1632. Var
  1633. SS : ShortString;
  1634. begin
  1635. fpc_Val_qword_UnicodeStr:=0;
  1636. if length(S)>255 then
  1637. code:=256
  1638. else
  1639. begin
  1640. SS := S;
  1641. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1642. end;
  1643. end;
  1644. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1645. Var
  1646. SS : ShortString;
  1647. begin
  1648. fpc_Val_int64_UnicodeStr:=0;
  1649. if length(S)>255 then
  1650. code:=256
  1651. else
  1652. begin
  1653. SS := S;
  1654. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1655. end;
  1656. end;
  1657. {$endif CPU64}
  1658. {$ifndef FPUNONE}
  1659. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1660. var
  1661. ss : shortstring;
  1662. begin
  1663. str_real(len,fr,d,treal_type(rt),ss);
  1664. s:=ss;
  1665. end;
  1666. {$endif}
  1667. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1668. var ss:shortstring;
  1669. begin
  1670. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1671. s:=ss;
  1672. end;
  1673. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1674. var ss:shortstring;
  1675. begin
  1676. fpc_shortstr_bool(b,len,ss);
  1677. s:=ss;
  1678. end;
  1679. {$ifdef FPC_HAS_STR_CURRENCY}
  1680. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1681. var
  1682. ss : shortstring;
  1683. begin
  1684. str(c:len:fr,ss);
  1685. s:=ss;
  1686. end;
  1687. {$endif FPC_HAS_STR_CURRENCY}
  1688. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1689. Var
  1690. SS : ShortString;
  1691. begin
  1692. Str (v:Len,SS);
  1693. S:=SS;
  1694. end;
  1695. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1696. Var
  1697. SS : ShortString;
  1698. begin
  1699. str(v:Len,SS);
  1700. S:=SS;
  1701. end;
  1702. {$ifndef CPU64}
  1703. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1704. Var
  1705. SS : ShortString;
  1706. begin
  1707. Str (v:Len,SS);
  1708. S:=SS;
  1709. end;
  1710. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1711. Var
  1712. SS : ShortString;
  1713. begin
  1714. str(v:Len,SS);
  1715. S:=SS;
  1716. end;
  1717. {$endif CPU64}
  1718. { converts an utf-16 code point or surrogate pair to utf-32 }
  1719. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1720. var
  1721. w: unicodechar;
  1722. begin
  1723. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1724. { are the same in UTF-32 }
  1725. w:=s[index];
  1726. if (w<=#$d7ff) or
  1727. (w>=#$e000) then
  1728. begin
  1729. result:=UCS4Char(w);
  1730. len:=1;
  1731. end
  1732. { valid surrogate pair? }
  1733. else if (w<=#$dbff) and
  1734. { w>=#$d7ff check not needed, checked above }
  1735. (index<length(s)) and
  1736. (s[index+1]>=#$dc00) and
  1737. (s[index+1]<=#$dfff) then
  1738. { convert the surrogate pair to UTF-32 }
  1739. begin
  1740. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1741. len:=2;
  1742. end
  1743. else
  1744. { invalid surrogate -> do nothing }
  1745. begin
  1746. result:=UCS4Char(w);
  1747. len:=1;
  1748. end;
  1749. end;
  1750. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1751. begin
  1752. if assigned(Source) then
  1753. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1754. else
  1755. Result:=0;
  1756. end;
  1757. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1758. var
  1759. i,j : SizeUInt;
  1760. w : word;
  1761. lw : longword;
  1762. len : longint;
  1763. begin
  1764. result:=0;
  1765. if source=nil then
  1766. exit;
  1767. i:=0;
  1768. j:=0;
  1769. if assigned(Dest) then
  1770. begin
  1771. while (i<SourceChars) and (j<MaxDestBytes) do
  1772. begin
  1773. w:=word(Source[i]);
  1774. case w of
  1775. 0..$7f:
  1776. begin
  1777. Dest[j]:=char(w);
  1778. inc(j);
  1779. end;
  1780. $80..$7ff:
  1781. begin
  1782. if j+1>=MaxDestBytes then
  1783. break;
  1784. Dest[j]:=char($c0 or (w shr 6));
  1785. Dest[j+1]:=char($80 or (w and $3f));
  1786. inc(j,2);
  1787. end;
  1788. $800..$d7ff,$e000..$ffff:
  1789. begin
  1790. if j+2>=MaxDestBytes then
  1791. break;
  1792. Dest[j]:=char($e0 or (w shr 12));
  1793. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1794. Dest[j+2]:=char($80 or (w and $3f));
  1795. inc(j,3);
  1796. end;
  1797. $d800..$dbff:
  1798. {High Surrogates}
  1799. begin
  1800. if j+3>=MaxDestBytes then
  1801. break;
  1802. if (i<sourcechars-1) and
  1803. (word(Source[i+1]) >= $dc00) and
  1804. (word(Source[i+1]) <= $dfff) then
  1805. begin
  1806. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1807. Dest[j]:=char($f0 or (lw shr 18));
  1808. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1809. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1810. Dest[j+3]:=char($80 or (lw and $3f));
  1811. inc(j,4);
  1812. inc(i);
  1813. end;
  1814. end;
  1815. end;
  1816. inc(i);
  1817. end;
  1818. if j>SizeUInt(MaxDestBytes-1) then
  1819. j:=MaxDestBytes-1;
  1820. Dest[j]:=#0;
  1821. end
  1822. else
  1823. begin
  1824. while i<SourceChars do
  1825. begin
  1826. case word(Source[i]) of
  1827. $0..$7f:
  1828. inc(j);
  1829. $80..$7ff:
  1830. inc(j,2);
  1831. $800..$d7ff,$e000..$ffff:
  1832. inc(j,3);
  1833. $d800..$dbff:
  1834. begin
  1835. if (i<sourcechars-1) and
  1836. (word(Source[i+1]) >= $dc00) and
  1837. (word(Source[i+1]) <= $dfff) then
  1838. begin
  1839. inc(j,4);
  1840. inc(i);
  1841. end;
  1842. end;
  1843. end;
  1844. inc(i);
  1845. end;
  1846. end;
  1847. result:=j+1;
  1848. end;
  1849. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1850. begin
  1851. if assigned(Source) then
  1852. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1853. else
  1854. Result:=0;
  1855. end;
  1856. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1857. const
  1858. UNICODE_INVALID=63;
  1859. var
  1860. InputUTF8: SizeUInt;
  1861. IBYTE: BYTE;
  1862. OutputUnicode: SizeUInt;
  1863. PRECHAR: SizeUInt;
  1864. TempBYTE: BYTE;
  1865. CharLen: SizeUint;
  1866. LookAhead: SizeUInt;
  1867. UC: SizeUInt;
  1868. begin
  1869. if not assigned(Source) then
  1870. begin
  1871. result:=0;
  1872. exit;
  1873. end;
  1874. result:=SizeUInt(-1);
  1875. InputUTF8:=0;
  1876. OutputUnicode:=0;
  1877. PreChar:=0;
  1878. if Assigned(Dest) Then
  1879. begin
  1880. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1881. begin
  1882. IBYTE:=byte(Source[InputUTF8]);
  1883. if (IBYTE and $80) = 0 then
  1884. begin
  1885. //One character US-ASCII, convert it to unicode
  1886. if IBYTE = 10 then
  1887. begin
  1888. If (PreChar<>13) and FALSE then
  1889. begin
  1890. //Expand to crlf, conform UTF-8.
  1891. //This procedure will break the memory alocation by
  1892. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1893. if OutputUnicode+1<MaxDestChars then
  1894. begin
  1895. Dest[OutputUnicode]:=WideChar(13);
  1896. inc(OutputUnicode);
  1897. Dest[OutputUnicode]:=WideChar(10);
  1898. inc(OutputUnicode);
  1899. PreChar:=10;
  1900. end
  1901. else
  1902. begin
  1903. Dest[OutputUnicode]:=WideChar(13);
  1904. inc(OutputUnicode);
  1905. end;
  1906. end
  1907. else
  1908. begin
  1909. Dest[OutputUnicode]:=WideChar(IBYTE);
  1910. inc(OutputUnicode);
  1911. PreChar:=IBYTE;
  1912. end;
  1913. end
  1914. else
  1915. begin
  1916. Dest[OutputUnicode]:=WideChar(IBYTE);
  1917. inc(OutputUnicode);
  1918. PreChar:=IBYTE;
  1919. end;
  1920. inc(InputUTF8);
  1921. end
  1922. else
  1923. begin
  1924. TempByte:=IBYTE;
  1925. CharLen:=0;
  1926. while (TempBYTE and $80)<>0 do
  1927. begin
  1928. TempBYTE:=(TempBYTE shl 1) and $FE;
  1929. inc(CharLen);
  1930. end;
  1931. //Test for the "CharLen" conforms UTF-8 string
  1932. //This means the 10xxxxxx pattern.
  1933. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1934. begin
  1935. //Insuficient chars in string to decode
  1936. //UTF-8 array. Fallback to single char.
  1937. CharLen:= 1;
  1938. end;
  1939. for LookAhead := 1 to CharLen-1 do
  1940. begin
  1941. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1942. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1943. begin
  1944. //Invalid UTF-8 sequence, fallback.
  1945. CharLen:= LookAhead;
  1946. break;
  1947. end;
  1948. end;
  1949. UC:=$FFFF;
  1950. case CharLen of
  1951. 1: begin
  1952. //Not valid UTF-8 sequence
  1953. UC:=UNICODE_INVALID;
  1954. end;
  1955. 2: begin
  1956. //Two bytes UTF, convert it
  1957. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1958. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1959. if UC <= $7F then
  1960. begin
  1961. //Invalid UTF sequence.
  1962. UC:=UNICODE_INVALID;
  1963. end;
  1964. end;
  1965. 3: begin
  1966. //Three bytes, convert it to unicode
  1967. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1968. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1969. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1970. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1971. begin
  1972. //Invalid UTF-8 sequence
  1973. UC:= UNICODE_INVALID;
  1974. End;
  1975. end;
  1976. 4: begin
  1977. //Four bytes, convert it to two unicode characters
  1978. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1979. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1980. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1981. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1982. if (UC < $10000) or (UC > $10FFFF) then
  1983. begin
  1984. UC:= UNICODE_INVALID;
  1985. end
  1986. else
  1987. begin
  1988. { only store pair if room }
  1989. dec(UC,$10000);
  1990. if (OutputUnicode<MaxDestChars-1) then
  1991. begin
  1992. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1993. inc(OutputUnicode);
  1994. UC:=(UC and $3ff) + $DC00;
  1995. end
  1996. else
  1997. begin
  1998. InputUTF8:= InputUTF8 + CharLen;
  1999. { don't store anything }
  2000. CharLen:=0;
  2001. end;
  2002. end;
  2003. end;
  2004. 5,6,7: begin
  2005. //Invalid UTF8 to unicode conversion,
  2006. //mask it as invalid UNICODE too.
  2007. UC:=UNICODE_INVALID;
  2008. end;
  2009. end;
  2010. if CharLen > 0 then
  2011. begin
  2012. PreChar:=UC;
  2013. Dest[OutputUnicode]:=WideChar(UC);
  2014. inc(OutputUnicode);
  2015. end;
  2016. InputUTF8:= InputUTF8 + CharLen;
  2017. end;
  2018. end;
  2019. Result:=OutputUnicode+1;
  2020. end
  2021. else
  2022. begin
  2023. while (InputUTF8<SourceBytes) do
  2024. begin
  2025. IBYTE:=byte(Source[InputUTF8]);
  2026. if (IBYTE and $80) = 0 then
  2027. begin
  2028. //One character US-ASCII, convert it to unicode
  2029. if IBYTE = 10 then
  2030. begin
  2031. if (PreChar<>13) and FALSE then
  2032. begin
  2033. //Expand to crlf, conform UTF-8.
  2034. //This procedure will break the memory alocation by
  2035. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  2036. inc(OutputUnicode,2);
  2037. PreChar:=10;
  2038. end
  2039. else
  2040. begin
  2041. inc(OutputUnicode);
  2042. PreChar:=IBYTE;
  2043. end;
  2044. end
  2045. else
  2046. begin
  2047. inc(OutputUnicode);
  2048. PreChar:=IBYTE;
  2049. end;
  2050. inc(InputUTF8);
  2051. end
  2052. else
  2053. begin
  2054. TempByte:=IBYTE;
  2055. CharLen:=0;
  2056. while (TempBYTE and $80)<>0 do
  2057. begin
  2058. TempBYTE:=(TempBYTE shl 1) and $FE;
  2059. inc(CharLen);
  2060. end;
  2061. //Test for the "CharLen" conforms UTF-8 string
  2062. //This means the 10xxxxxx pattern.
  2063. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  2064. begin
  2065. //Insuficient chars in string to decode
  2066. //UTF-8 array. Fallback to single char.
  2067. CharLen:= 1;
  2068. end;
  2069. for LookAhead := 1 to CharLen-1 do
  2070. begin
  2071. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  2072. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  2073. begin
  2074. //Invalid UTF-8 sequence, fallback.
  2075. CharLen:= LookAhead;
  2076. break;
  2077. end;
  2078. end;
  2079. UC:=$FFFF;
  2080. case CharLen of
  2081. 1: begin
  2082. //Not valid UTF-8 sequence
  2083. UC:=UNICODE_INVALID;
  2084. end;
  2085. 2: begin
  2086. //Two bytes UTF, convert it
  2087. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  2088. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  2089. if UC <= $7F then
  2090. begin
  2091. //Invalid UTF sequence.
  2092. UC:=UNICODE_INVALID;
  2093. end;
  2094. end;
  2095. 3: begin
  2096. //Three bytes, convert it to unicode
  2097. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  2098. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  2099. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  2100. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  2101. begin
  2102. //Invalid UTF-8 sequence
  2103. UC:= UNICODE_INVALID;
  2104. end;
  2105. end;
  2106. 4: begin
  2107. //Four bytes, convert it to two unicode characters
  2108. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  2109. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  2110. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  2111. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  2112. if (UC < $10000) or (UC > $10FFFF) then
  2113. UC:= UNICODE_INVALID
  2114. else
  2115. { extra character character }
  2116. inc(OutputUnicode);
  2117. end;
  2118. 5,6,7: begin
  2119. //Invalid UTF8 to unicode conversion,
  2120. //mask it as invalid UNICODE too.
  2121. UC:=UNICODE_INVALID;
  2122. end;
  2123. end;
  2124. if CharLen > 0 then
  2125. begin
  2126. PreChar:=UC;
  2127. inc(OutputUnicode);
  2128. end;
  2129. InputUTF8:= InputUTF8 + CharLen;
  2130. end;
  2131. end;
  2132. Result:=OutputUnicode+1;
  2133. end;
  2134. end;
  2135. function UTF8Encode(const s : RawByteString) : UTF8String; inline;
  2136. begin
  2137. Result:=UTF8Encode(UnicodeString(s));
  2138. end;
  2139. function UTF8Encode(const s : UnicodeString) : UTF8String;
  2140. var
  2141. i : SizeInt;
  2142. hs : UTF8String;
  2143. begin
  2144. result:='';
  2145. if s='' then
  2146. exit;
  2147. SetLength(hs,length(s)*3);
  2148. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  2149. if i>0 then
  2150. begin
  2151. SetLength(hs,i-1);
  2152. result:=hs;
  2153. end;
  2154. end;
  2155. function UTF8Decode(const s : UTF8String): UnicodeString;
  2156. var
  2157. i : SizeInt;
  2158. hs : UnicodeString;
  2159. begin
  2160. result:='';
  2161. if s='' then
  2162. exit;
  2163. SetLength(hs,length(s));
  2164. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  2165. if i>0 then
  2166. begin
  2167. SetLength(hs,i-1);
  2168. result:=hs;
  2169. end;
  2170. end;
  2171. function AnsiToUtf8(const s : RawByteString): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
  2172. begin
  2173. Result:=Utf8Encode(s);
  2174. end;
  2175. function Utf8ToAnsi(const s : UTF8String) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2176. begin
  2177. Result:=Utf8Decode(s);
  2178. end;
  2179. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  2180. var
  2181. i, slen,
  2182. destindex : SizeInt;
  2183. len : longint;
  2184. begin
  2185. slen:=length(s);
  2186. setlength(result,slen+1);
  2187. i:=1;
  2188. destindex:=0;
  2189. while (i<=slen) do
  2190. begin
  2191. result[destindex]:=utf16toutf32(s,i,len);
  2192. inc(destindex);
  2193. inc(i,len);
  2194. end;
  2195. { destindex <= slen (surrogate pairs may have been merged) }
  2196. { destindex+1 for terminating #0 (dynamic arrays are }
  2197. { implicitely filled with zero) }
  2198. setlength(result,destindex+1);
  2199. end;
  2200. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  2201. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  2202. var
  2203. p : PUnicodeChar;
  2204. begin
  2205. { if nc > $ffff, we need two places }
  2206. if (index+ord(nc > $ffff)>length(s)) then
  2207. if (length(s) < 10*256) then
  2208. setlength(s,length(s)+10)
  2209. else
  2210. setlength(s,length(s)+length(s) shr 8);
  2211. { we know that s is unique -> avoid uniquestring calls}
  2212. p:=@s[index];
  2213. if (nc<$ffff) then
  2214. begin
  2215. p^:=unicodechar(nc);
  2216. inc(index);
  2217. end
  2218. else if (dword(nc)<=$10ffff) then
  2219. begin
  2220. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  2221. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  2222. inc(index,2);
  2223. end
  2224. else
  2225. { invalid code point }
  2226. begin
  2227. p^:='?';
  2228. inc(index);
  2229. end;
  2230. end;
  2231. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  2232. var
  2233. i : SizeInt;
  2234. resindex : SizeInt;
  2235. begin
  2236. { skip terminating #0 }
  2237. SetLength(result,length(s)-1);
  2238. resindex:=1;
  2239. for i:=0 to high(s)-1 do
  2240. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  2241. { adjust result length (may be too big due to growing }
  2242. { for surrogate pairs) }
  2243. setlength(result,resindex-1);
  2244. end;
  2245. function WideStringToUCS4String(const s : WideString) : UCS4String;
  2246. var
  2247. i, slen,
  2248. destindex : SizeInt;
  2249. len : longint;
  2250. begin
  2251. slen:=length(s);
  2252. setlength(result,slen+1);
  2253. i:=1;
  2254. destindex:=0;
  2255. while (i<=slen) do
  2256. begin
  2257. result[destindex]:=utf16toutf32(s,i,len);
  2258. inc(destindex);
  2259. inc(i,len);
  2260. end;
  2261. { destindex <= slen (surrogate pairs may have been merged) }
  2262. { destindex+1 for terminating #0 (dynamic arrays are }
  2263. { implicitely filled with zero) }
  2264. setlength(result,destindex+1);
  2265. end;
  2266. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  2267. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  2268. var
  2269. p : PWideChar;
  2270. begin
  2271. { if nc > $ffff, we need two places }
  2272. if (index+ord(nc > $ffff)>length(s)) then
  2273. if (length(s) < 10*256) then
  2274. setlength(s,length(s)+10)
  2275. else
  2276. setlength(s,length(s)+length(s) shr 8);
  2277. { we know that s is unique -> avoid uniquestring calls}
  2278. p:=@s[index];
  2279. if (nc<$ffff) then
  2280. begin
  2281. p^:=widechar(nc);
  2282. inc(index);
  2283. end
  2284. else if (dword(nc)<=$10ffff) then
  2285. begin
  2286. p^:=widechar((nc - $10000) shr 10 + $d800);
  2287. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  2288. inc(index,2);
  2289. end
  2290. else
  2291. { invalid code point }
  2292. begin
  2293. p^:='?';
  2294. inc(index);
  2295. end;
  2296. end;
  2297. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2298. var
  2299. i : SizeInt;
  2300. resindex : SizeInt;
  2301. begin
  2302. { skip terminating #0 }
  2303. SetLength(result,length(s)-1);
  2304. resindex:=1;
  2305. for i:=0 to high(s)-1 do
  2306. ConcatUTF32ToWideStr(s[i],result,resindex);
  2307. { adjust result length (may be too big due to growing }
  2308. { for surrogate pairs) }
  2309. setlength(result,resindex-1);
  2310. end;
  2311. const
  2312. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2313. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2314. procedure unimplementedunicodestring;
  2315. begin
  2316. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2317. If IsConsole then
  2318. begin
  2319. Writeln(StdErr,SNoUnicodestrings);
  2320. Writeln(StdErr,SRecompileWithUnicodestrings);
  2321. end;
  2322. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2323. HandleErrorFrame(233,get_frame);
  2324. end;
  2325. function StringElementSize(const S: UnicodeString): Word; overload;
  2326. begin
  2327. if assigned(Pointer(S)) then
  2328. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2329. else
  2330. Result:=SizeOf(UnicodeChar);
  2331. end;
  2332. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2333. begin
  2334. if assigned(Pointer(S)) then
  2335. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2336. else
  2337. Result:=0;
  2338. end;
  2339. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2340. begin
  2341. {$ifdef FPC_HAS_CPSTRING}
  2342. if assigned(Pointer(S)) then
  2343. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2344. else
  2345. {$endif FPC_HAS_CPSTRING}
  2346. Result:=DefaultUnicodeCodePage;
  2347. end;
  2348. {$warnings off}
  2349. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2350. begin
  2351. unimplementedunicodestring;
  2352. end;
  2353. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2354. begin
  2355. unimplementedunicodestring;
  2356. end;
  2357. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2358. begin
  2359. unimplementedunicodestring;
  2360. end;
  2361. {$warnings on}
  2362. procedure initunicodestringmanager;
  2363. begin
  2364. {$ifndef HAS_WIDESTRINGMANAGER}
  2365. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2366. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2367. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2368. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2369. {$endif HAS_WIDESTRINGMANAGER}
  2370. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2371. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2372. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2373. {$ifndef HAS_WIDESTRINGMANAGER}
  2374. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2375. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2376. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2377. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2378. {$endif HAS_WIDESTRINGMANAGER}
  2379. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2380. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2381. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2382. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2383. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2384. end;