ustrings.inc 71 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  20. with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
  21. Windows COM BSTR.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Packed Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. First : UnicodeChar;
  40. end;
  41. Const
  42. UnicodeRecLen = SizeOf(TUnicodeRec);
  43. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  44. {
  45. Default UnicodeChar <-> Char conversion is to only convert the
  46. lower 127 chars, all others are translated to '?'.
  47. These routines can be overwritten for the Current Locale
  48. }
  49. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
  50. var
  51. i : SizeInt;
  52. p : PAnsiChar;
  53. begin
  54. setlength(dest,len);
  55. p:=pointer(dest); {SetLength guarantees that dest is unique}
  56. for i:=1 to len do
  57. begin
  58. if word(source^)<256 then
  59. p^:=char(word(source^))
  60. else
  61. p^:='?';
  62. inc(source);
  63. inc(p);
  64. end;
  65. end;
  66. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  67. var
  68. i : SizeInt;
  69. p : PUnicodeChar;
  70. begin
  71. setlength(dest,len);
  72. p:=pointer(dest); {SetLength guarantees that dest is unique}
  73. for i:=1 to len do
  74. begin
  75. p^:=unicodechar(byte(source^));
  76. inc(source);
  77. inc(p);
  78. end;
  79. end;
  80. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  81. begin
  82. DefaultCharLengthPChar:=length(Str);
  83. end;
  84. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  85. begin
  86. if str[0]<>#0 then
  87. DefaultCodePointLength:=1
  88. else
  89. DefaultCodePointLength:=0;
  90. end;
  91. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  92. begin
  93. manager:=widestringmanager;
  94. end;
  95. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  96. begin
  97. Old:=widestringmanager;
  98. widestringmanager:=New;
  99. end;
  100. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  101. begin
  102. widestringmanager:=New;
  103. end;
  104. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  105. begin
  106. manager:=widestringmanager;
  107. end;
  108. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  109. begin
  110. Old:=widestringmanager;
  111. widestringmanager:=New;
  112. end;
  113. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  114. begin
  115. widestringmanager:=New;
  116. end;
  117. {****************************************************************************
  118. Internal functions, not in interface.
  119. ****************************************************************************}
  120. procedure UnicodeStringError;
  121. begin
  122. HandleErrorFrame(204,get_frame);
  123. end;
  124. {$ifdef UnicodeStrDebug}
  125. Procedure DumpUnicodeRec(S : Pointer);
  126. begin
  127. If S=Nil then
  128. Writeln ('String is nil')
  129. Else
  130. Begin
  131. With PUnicodeRec(S-UnicodeFirstOff)^ do
  132. begin
  133. Write ('(Len:',len);
  134. Writeln (' Ref: ',ref,')');
  135. end;
  136. end;
  137. end;
  138. {$endif}
  139. Function NewUnicodeString(Len : SizeInt) : Pointer;
  140. {
  141. Allocate a new UnicodeString on the heap.
  142. initialize it to zero length and reference count 1.
  143. }
  144. Var
  145. P : Pointer;
  146. begin
  147. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  148. If P<>Nil then
  149. begin
  150. PUnicodeRec(P)^.Len:=Len*2; { Initial length }
  151. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  152. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  153. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  154. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  155. inc(p,UnicodeFirstOff); { Points to string now }
  156. end
  157. else
  158. UnicodeStringError;
  159. NewUnicodeString:=P;
  160. end;
  161. Procedure DisposeUnicodeString(Var S : Pointer);
  162. {
  163. Deallocates a UnicodeString From the heap.
  164. }
  165. begin
  166. If S=Nil then
  167. exit;
  168. Dec (S,UnicodeFirstOff);
  169. Freemem(S);
  170. S:=Nil;
  171. end;
  172. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  173. {
  174. Decreases the ReferenceCount of a non constant unicodestring;
  175. If the reference count is zero, deallocate the string;
  176. }
  177. Type
  178. pSizeInt = ^SizeInt;
  179. Var
  180. l : pSizeInt;
  181. Begin
  182. { Zero string }
  183. if S=Nil then
  184. exit;
  185. { check for constant strings ...}
  186. l:=@PUnicodeRec(S-UnicodeFirstOff)^.Ref;
  187. if l^<0 then
  188. exit;
  189. { declocked does a MT safe dec and returns true, if the counter is 0 }
  190. if declocked(l^) then
  191. { Ref count dropped to zero ...
  192. ... remove }
  193. DisposeUnicodeString(S);
  194. end;
  195. { alias for internal use }
  196. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  197. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  198. Begin
  199. If S=Nil then
  200. exit;
  201. { constant string ? }
  202. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  203. exit;
  204. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  205. end;
  206. { alias for internal use }
  207. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  208. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  209. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  210. {
  211. Converts a UnicodeString to a ShortString;
  212. }
  213. Var
  214. Size : SizeInt;
  215. temp : ansistring;
  216. begin
  217. result:='';
  218. Size:=Length(S2);
  219. if Size>0 then
  220. begin
  221. If Size>high_of_res then
  222. Size:=high_of_res;
  223. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  224. result:=temp;
  225. end;
  226. end;
  227. {$else FPC_STRTOSHORTSTRINGPROC}
  228. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  229. {
  230. Converts a UnicodeString to a ShortString;
  231. }
  232. Var
  233. Size : SizeInt;
  234. temp : ansistring;
  235. begin
  236. res:='';
  237. Size:=Length(S2);
  238. if Size>0 then
  239. begin
  240. If Size>high(res) then
  241. Size:=high(res);
  242. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  243. res:=temp;
  244. end;
  245. end;
  246. {$endif FPC_STRTOSHORTSTRINGPROC}
  247. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  248. {
  249. Converts a ShortString to a UnicodeString;
  250. }
  251. Var
  252. Size : SizeInt;
  253. begin
  254. result:='';
  255. Size:=Length(S2);
  256. if Size>0 then
  257. begin
  258. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  259. { Terminating Zero }
  260. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  261. end;
  262. end;
  263. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString;cp : TSystemCodePage): AnsiString; compilerproc;
  264. {
  265. Converts a UnicodeString to an AnsiString
  266. }
  267. Var
  268. Size : SizeInt;
  269. begin
  270. result:='';
  271. Size:=Length(S2);
  272. if Size>0 then
  273. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  274. end;
  275. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : AnsiString): UnicodeString; compilerproc;
  276. {
  277. Converts an AnsiString to a UnicodeString;
  278. }
  279. Var
  280. Size : SizeInt;
  281. begin
  282. result:='';
  283. Size:=Length(S2);
  284. if Size>0 then
  285. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),StringCodePage(S2),result,Size);
  286. end;
  287. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  288. begin
  289. SetLength(Result,Length(S2));
  290. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  291. end;
  292. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  293. begin
  294. SetLength(Result,Length(S2));
  295. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  296. end;
  297. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar;cp : TSystemCodePage): ansistring; compilerproc;
  298. var
  299. Size : SizeInt;
  300. begin
  301. result:='';
  302. if p=nil then
  303. exit;
  304. Size := IndexWord(p^, -1, 0);
  305. if Size>0 then
  306. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  307. end;
  308. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  309. var
  310. Size : SizeInt;
  311. begin
  312. result:='';
  313. if p=nil then
  314. exit;
  315. Size := IndexWord(p^, -1, 0);
  316. Setlength(result,Size);
  317. if Size>0 then
  318. begin
  319. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  320. { Terminating Zero }
  321. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  322. end;
  323. end;
  324. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  325. var
  326. Size : SizeInt;
  327. begin
  328. result:='';
  329. if p=nil then
  330. exit;
  331. Size := IndexWord(p^, -1, 0);
  332. Setlength(result,Size);
  333. if Size>0 then
  334. begin
  335. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  336. { Terminating Zero }
  337. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  338. end;
  339. end;
  340. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  341. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  342. var
  343. Size : SizeInt;
  344. temp: ansistring;
  345. begin
  346. result:='';
  347. if p=nil then
  348. exit;
  349. Size := IndexWord(p^, $7fffffff, 0);
  350. if Size>0 then
  351. begin
  352. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  353. result:=temp;
  354. end;
  355. end;
  356. {$else FPC_STRTOSHORTSTRINGPROC}
  357. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  358. var
  359. Size : SizeInt;
  360. temp: ansistring;
  361. begin
  362. res:='';
  363. if p=nil then
  364. exit;
  365. Size:=IndexWord(p^, high(PtrInt), 0);
  366. if Size>0 then
  367. begin
  368. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  369. res:=temp;
  370. end;
  371. end;
  372. {$endif FPC_STRTOSHORTSTRINGPROC}
  373. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar;cp : TSystemCodePage): ansistring; compilerproc;
  374. var
  375. Size : SizeInt;
  376. begin
  377. result:='';
  378. if p=nil then
  379. exit;
  380. Size := IndexWord(p^, -1, 0);
  381. if Size>0 then
  382. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  383. end;
  384. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  385. Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
  386. var
  387. Size : SizeInt;
  388. temp: ansistring;
  389. begin
  390. result:='';
  391. if p=nil then
  392. exit;
  393. Size := IndexWord(p^, $7fffffff, 0);
  394. if Size>0 then
  395. begin
  396. widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
  397. result:=temp;
  398. end;
  399. end;
  400. {$else FPC_STRTOSHORTSTRINGPROC}
  401. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  402. var
  403. Size : SizeInt;
  404. temp: ansistring;
  405. begin
  406. res:='';
  407. if p=nil then
  408. exit;
  409. Size:=IndexWord(p^, high(PtrInt), 0);
  410. if Size>0 then
  411. begin
  412. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  413. res:=temp;
  414. end;
  415. end;
  416. {$endif FPC_STRTOSHORTSTRINGPROC}
  417. { checked against the ansistring routine, 2001-05-27 (FK) }
  418. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  419. {
  420. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  421. }
  422. begin
  423. If S2<>nil then
  424. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  425. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  426. { Decrease the reference count on the old S1 }
  427. fpc_unicodestr_decr_ref (S1);
  428. s1:=s2;
  429. end;
  430. { alias for internal use }
  431. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  432. {$ifndef STR_CONCAT_PROCS}
  433. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  434. Var
  435. Size,Location : SizeInt;
  436. pc : punicodechar;
  437. begin
  438. { only assign if s1 or s2 is empty }
  439. if (S1='') then
  440. begin
  441. result:=s2;
  442. exit;
  443. end;
  444. if (S2='') then
  445. begin
  446. result:=s1;
  447. exit;
  448. end;
  449. Location:=Length(S1);
  450. Size:=length(S2);
  451. SetLength(result,Size+Location);
  452. pc:=punicodechar(result);
  453. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  454. inc(pc,location);
  455. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  456. end;
  457. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  458. Var
  459. i : Longint;
  460. p : pointer;
  461. pc : punicodechar;
  462. Size,NewSize : SizeInt;
  463. begin
  464. { First calculate size of the result so we can do
  465. a single call to SetLength() }
  466. NewSize:=0;
  467. for i:=low(sarr) to high(sarr) do
  468. inc(Newsize,length(sarr[i]));
  469. SetLength(result,NewSize);
  470. pc:=punicodechar(result);
  471. for i:=low(sarr) to high(sarr) do
  472. begin
  473. p:=pointer(sarr[i]);
  474. if assigned(p) then
  475. begin
  476. Size:=length(unicodestring(p));
  477. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  478. inc(pc,size);
  479. end;
  480. end;
  481. end;
  482. {$else STR_CONCAT_PROCS}
  483. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  484. Var
  485. Size,Location : SizeInt;
  486. same : boolean;
  487. begin
  488. { only assign if s1 or s2 is empty }
  489. if (S1='') then
  490. begin
  491. DestS:=s2;
  492. exit;
  493. end;
  494. if (S2='') then
  495. begin
  496. DestS:=s1;
  497. exit;
  498. end;
  499. Location:=Length(S1);
  500. Size:=length(S2);
  501. { Use Pointer() typecasts to prevent extra conversion code }
  502. if Pointer(DestS)=Pointer(S1) then
  503. begin
  504. same:=Pointer(S1)=Pointer(S2);
  505. SetLength(DestS,Size+Location);
  506. if same then
  507. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  508. else
  509. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  510. end
  511. else if Pointer(DestS)=Pointer(S2) then
  512. begin
  513. SetLength(DestS,Size+Location);
  514. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  515. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  516. end
  517. else
  518. begin
  519. DestS:='';
  520. SetLength(DestS,Size+Location);
  521. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  522. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  523. end;
  524. end;
  525. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  526. Var
  527. i : Longint;
  528. p,pc : pointer;
  529. Size,NewLen : SizeInt;
  530. lowstart : longint;
  531. destcopy : pointer;
  532. OldDestLen : SizeInt;
  533. begin
  534. if high(sarr)=0 then
  535. begin
  536. DestS:='';
  537. exit;
  538. end;
  539. destcopy:=nil;
  540. lowstart:=low(sarr);
  541. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  542. inc(lowstart);
  543. { Check for another reuse, then we can't use
  544. the append optimization }
  545. for i:=lowstart to high(sarr) do
  546. begin
  547. if Pointer(DestS)=Pointer(sarr[i]) then
  548. begin
  549. { if DestS is used somewhere in the middle of the expression,
  550. we need to make sure the original string still exists after
  551. we empty/modify DestS.
  552. This trick only works with reference counted strings. Therefor
  553. this optimization is disabled for WINLIKEUNICODESTRING }
  554. destcopy:=pointer(dests);
  555. fpc_UnicodeStr_Incr_Ref(destcopy);
  556. lowstart:=low(sarr);
  557. break;
  558. end;
  559. end;
  560. { Start with empty DestS if we start with concatting
  561. the first array element }
  562. if lowstart=low(sarr) then
  563. DestS:='';
  564. OldDestLen:=length(DestS);
  565. { Calculate size of the result so we can do
  566. a single call to SetLength() }
  567. NewLen:=0;
  568. for i:=low(sarr) to high(sarr) do
  569. inc(NewLen,length(sarr[i]));
  570. SetLength(DestS,NewLen);
  571. { Concat all strings, except the string we already
  572. copied in DestS }
  573. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  574. for i:=lowstart to high(sarr) do
  575. begin
  576. p:=pointer(sarr[i]);
  577. if assigned(p) then
  578. begin
  579. Size:=length(unicodestring(p));
  580. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  581. inc(pc,size*sizeof(UnicodeChar));
  582. end;
  583. end;
  584. fpc_UnicodeStr_Decr_Ref(destcopy);
  585. end;
  586. {$endif STR_CONCAT_PROCS}
  587. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  588. var
  589. w: unicodestring;
  590. begin
  591. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  592. fpc_Char_To_UChar:=w[1];
  593. end;
  594. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  595. {
  596. Converts a Char to a UnicodeString;
  597. }
  598. begin
  599. Setlength(fpc_Char_To_UnicodeStr,1);
  600. fpc_Char_To_UnicodeStr[1]:=c;
  601. { Terminating Zero }
  602. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  603. end;
  604. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  605. {
  606. Converts a UnicodeChar to a Char;
  607. }
  608. var
  609. s: ansistring;
  610. begin
  611. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  612. if length(s)=1 then
  613. fpc_UChar_To_Char:= s[1]
  614. else
  615. fpc_UChar_To_Char:='?';
  616. end;
  617. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  618. {
  619. Converts a WideChar to a UnicodeString;
  620. }
  621. begin
  622. Setlength (Result,1);
  623. Result[1]:= c;
  624. end;
  625. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  626. var
  627. w: widestring;
  628. begin
  629. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  630. fpc_Char_To_WChar:=w[1];
  631. end;
  632. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  633. {
  634. Converts a WideChar to a Char;
  635. }
  636. var
  637. s: ansistring;
  638. begin
  639. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  640. if length(s)=1 then
  641. fpc_WChar_To_Char:= s[1]
  642. else
  643. fpc_WChar_To_Char:='?';
  644. end;
  645. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  646. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  647. {
  648. Converts a WideChar to a ShortString;
  649. }
  650. var
  651. s: ansistring;
  652. begin
  653. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  654. fpc_WChar_To_ShortStr:= s;
  655. end;
  656. {$else FPC_STRTOSHORTSTRINGPROC}
  657. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  658. {
  659. Converts a WideChar to a ShortString;
  660. }
  661. var
  662. s: ansistring;
  663. begin
  664. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  665. res:=s;
  666. end;
  667. {$endif FPC_STRTOSHORTSTRINGPROC}
  668. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  669. {
  670. Converts a UnicodeChar to a UnicodeString;
  671. }
  672. begin
  673. Setlength (fpc_UChar_To_UnicodeStr,1);
  674. fpc_UChar_To_UnicodeStr[1]:= c;
  675. end;
  676. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar;cp : TSystemCodePage): AnsiString; compilerproc;
  677. {
  678. Converts a UnicodeChar to a AnsiString;
  679. }
  680. begin
  681. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  682. end;
  683. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  684. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  685. {
  686. Converts a UnicodeChar to a ShortString;
  687. }
  688. var
  689. s: ansistring;
  690. begin
  691. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  692. fpc_UChar_To_ShortStr:= s;
  693. end;
  694. {$else FPC_STRTOSHORTSTRINGPROC}
  695. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  696. {
  697. Converts a UnicodeChar to a ShortString;
  698. }
  699. var
  700. s: ansistring;
  701. begin
  702. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  703. res:=s;
  704. end;
  705. {$endif FPC_STRTOSHORTSTRINGPROC}
  706. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  707. Var
  708. L : SizeInt;
  709. begin
  710. if (not assigned(p)) or (p[0]=#0) Then
  711. begin
  712. fpc_pchar_to_unicodestr := '';
  713. exit;
  714. end;
  715. l:=IndexChar(p^,-1,#0);
  716. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  717. end;
  718. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  719. var
  720. i : SizeInt;
  721. begin
  722. if zerobased then
  723. begin
  724. if arr[0]=#0 Then
  725. begin
  726. fpc_chararray_to_unicodestr:='';
  727. exit;
  728. end;
  729. i:=IndexChar(arr,high(arr)+1,#0);
  730. if i=-1 then
  731. i:=high(arr)+1;
  732. end
  733. else
  734. i:=high(arr)+1;
  735. SetLength(fpc_CharArray_To_UnicodeStr,i);
  736. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  737. end;
  738. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  739. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  740. var
  741. l: longint;
  742. index: longint;
  743. len: byte;
  744. temp: ansistring;
  745. begin
  746. l := high(arr)+1;
  747. if l>=256 then
  748. l:=255
  749. else if l<0 then
  750. l:=0;
  751. if zerobased then
  752. begin
  753. index:=IndexWord(arr[0],l,0);
  754. if (index < 0) then
  755. len := l
  756. else
  757. len := index;
  758. end
  759. else
  760. len := l;
  761. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  762. fpc_UnicodeCharArray_To_ShortStr := temp;
  763. end;
  764. {$else FPC_STRTOSHORTSTRINGPROC}
  765. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  766. var
  767. l: longint;
  768. index: ptrint;
  769. len: byte;
  770. temp: ansistring;
  771. begin
  772. l := high(arr)+1;
  773. if l>=high(res)+1 then
  774. l:=high(res)
  775. else if l<0 then
  776. l:=0;
  777. if zerobased then
  778. begin
  779. index:=IndexWord(arr[0],l,0);
  780. if index<0 then
  781. len:=l
  782. else
  783. len:=index;
  784. end
  785. else
  786. len:=l;
  787. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  788. res:=temp;
  789. end;
  790. {$endif FPC_STRTOSHORTSTRINGPROC}
  791. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; cp : TSystemCodePage;zerobased: boolean = true): AnsiString; compilerproc;
  792. var
  793. i : SizeInt;
  794. begin
  795. if (zerobased) then
  796. begin
  797. i:=IndexWord(arr,high(arr)+1,0);
  798. if i = -1 then
  799. i := high(arr)+1;
  800. end
  801. else
  802. i := high(arr)+1;
  803. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  804. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  805. end;
  806. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  807. var
  808. i : SizeInt;
  809. begin
  810. if (zerobased) then
  811. begin
  812. i:=IndexWord(arr,high(arr)+1,0);
  813. if i = -1 then
  814. i := high(arr)+1;
  815. end
  816. else
  817. i := high(arr)+1;
  818. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  819. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  820. end;
  821. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  822. var
  823. i : SizeInt;
  824. begin
  825. if (zerobased) then
  826. begin
  827. i:=IndexWord(arr,high(arr)+1,0);
  828. if i = -1 then
  829. i := high(arr)+1;
  830. end
  831. else
  832. i := high(arr)+1;
  833. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  834. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  835. end;
  836. { due to their names, the following procedures should be in wstrings.inc,
  837. however, the compiler generates code using this functions on all platforms }
  838. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  839. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  840. var
  841. l: longint;
  842. index: longint;
  843. len: byte;
  844. temp: ansistring;
  845. begin
  846. l := high(arr)+1;
  847. if l>=256 then
  848. l:=255
  849. else if l<0 then
  850. l:=0;
  851. if zerobased then
  852. begin
  853. index:=IndexWord(arr[0],l,0);
  854. if (index < 0) then
  855. len := l
  856. else
  857. len := index;
  858. end
  859. else
  860. len := l;
  861. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  862. fpc_WideCharArray_To_ShortStr := temp;
  863. end;
  864. {$else FPC_STRTOSHORTSTRINGPROC}
  865. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  866. var
  867. l: longint;
  868. index: ptrint;
  869. len: byte;
  870. temp: ansistring;
  871. begin
  872. l := high(arr)+1;
  873. if l>=high(res)+1 then
  874. l:=high(res)
  875. else if l<0 then
  876. l:=0;
  877. if zerobased then
  878. begin
  879. index:=IndexWord(arr[0],l,0);
  880. if index<0 then
  881. len:=l
  882. else
  883. len:=index;
  884. end
  885. else
  886. len:=l;
  887. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  888. res:=temp;
  889. end;
  890. {$endif FPC_STRTOSHORTSTRINGPROC}
  891. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; cp : TSystemCodePage; zerobased: boolean = true): AnsiString; compilerproc;
  892. var
  893. i : SizeInt;
  894. begin
  895. if (zerobased) then
  896. begin
  897. i:=IndexWord(arr,high(arr)+1,0);
  898. if i = -1 then
  899. i := high(arr)+1;
  900. end
  901. else
  902. i := high(arr)+1;
  903. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  904. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  905. end;
  906. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  907. var
  908. i : SizeInt;
  909. begin
  910. if (zerobased) then
  911. begin
  912. i:=IndexWord(arr,high(arr)+1,0);
  913. if i = -1 then
  914. i := high(arr)+1;
  915. end
  916. else
  917. i := high(arr)+1;
  918. SetLength(fpc_WideCharArray_To_WideStr,i);
  919. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  920. end;
  921. {$ifndef FPC_STRTOCHARARRAYPROC}
  922. { inside the compiler, the resulttype is modified to that of the actual }
  923. { chararray we're converting to (JM) }
  924. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  925. var
  926. len: SizeInt;
  927. temp: ansistring;
  928. begin
  929. len := length(src);
  930. { make sure we don't dereference src if it can be nil (JM) }
  931. if len > 0 then
  932. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  933. len := length(temp);
  934. if len > arraysize then
  935. len := arraysize;
  936. {$r-}
  937. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  938. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  939. {$ifdef RangeCheckWasOn}
  940. {$r+}
  941. {$endif}
  942. end;
  943. { inside the compiler, the resulttype is modified to that of the actual }
  944. { unicodechararray we're converting to (JM) }
  945. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  946. var
  947. len: SizeInt;
  948. begin
  949. len := length(src);
  950. if len > arraysize then
  951. len := arraysize;
  952. {$r-}
  953. { make sure we don't try to access element 1 of the ansistring if it's nil }
  954. if len > 0 then
  955. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  956. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  957. {$ifdef RangeCheckWasOn}
  958. {$r+}
  959. {$endif}
  960. end;
  961. { inside the compiler, the resulttype is modified to that of the actual }
  962. { chararray we're converting to (JM) }
  963. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  964. var
  965. len: SizeInt;
  966. temp: unicodestring;
  967. begin
  968. len := length(src);
  969. { make sure we don't dereference src if it can be nil (JM) }
  970. if len > 0 then
  971. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  972. len := length(temp);
  973. if len > arraysize then
  974. len := arraysize;
  975. {$r-}
  976. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  977. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  978. {$ifdef RangeCheckWasOn}
  979. {$r+}
  980. {$endif}
  981. end;
  982. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  983. var
  984. len: longint;
  985. temp : unicodestring;
  986. begin
  987. len := length(src);
  988. { make sure we don't access char 1 if length is 0 (JM) }
  989. if len > 0 then
  990. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  991. len := length(temp);
  992. if len > arraysize then
  993. len := arraysize;
  994. {$r-}
  995. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  996. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  997. {$ifdef RangeCheckWasOn}
  998. {$r+}
  999. {$endif}
  1000. end;
  1001. {$else ndef FPC_STRTOCHARARRAYPROC}
  1002. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  1003. var
  1004. len: SizeInt;
  1005. temp: ansistring;
  1006. begin
  1007. len := length(src);
  1008. { make sure we don't dereference src if it can be nil (JM) }
  1009. if len > 0 then
  1010. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  1011. len := length(temp);
  1012. if len > length(res) then
  1013. len := length(res);
  1014. {$r-}
  1015. move(temp[1],res[0],len);
  1016. fillchar(res[len],length(res)-len,0);
  1017. {$ifdef RangeCheckWasOn}
  1018. {$r+}
  1019. {$endif}
  1020. end;
  1021. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  1022. var
  1023. len: SizeInt;
  1024. begin
  1025. len := length(src);
  1026. if len > length(res) then
  1027. len := length(res);
  1028. {$r-}
  1029. { make sure we don't try to access element 1 of the ansistring if it's nil }
  1030. if len > 0 then
  1031. move(src[1],res[0],len*SizeOf(UnicodeChar));
  1032. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1033. {$ifdef RangeCheckWasOn}
  1034. {$r+}
  1035. {$endif}
  1036. end;
  1037. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  1038. var
  1039. len: SizeInt;
  1040. temp: unicodestring;
  1041. begin
  1042. len := length(src);
  1043. { make sure we don't dereference src if it can be nil (JM) }
  1044. if len > 0 then
  1045. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1046. len := length(temp);
  1047. if len > length(res) then
  1048. len := length(res);
  1049. {$r-}
  1050. move(temp[1],res[0],len*sizeof(unicodechar));
  1051. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1052. {$ifdef RangeCheckWasOn}
  1053. {$r+}
  1054. {$endif}
  1055. end;
  1056. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  1057. var
  1058. len: longint;
  1059. temp : unicodestring;
  1060. begin
  1061. len := length(src);
  1062. { make sure we don't access char 1 if length is 0 (JM) }
  1063. if len > 0 then
  1064. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1065. len := length(temp);
  1066. if len > length(res) then
  1067. len := length(res);
  1068. {$r-}
  1069. move(temp[1],res[0],len*sizeof(unicodechar));
  1070. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  1071. {$ifdef RangeCheckWasOn}
  1072. {$r+}
  1073. {$endif}
  1074. end;
  1075. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
  1076. var
  1077. len: SizeInt;
  1078. temp: widestring;
  1079. begin
  1080. len := length(src);
  1081. { make sure we don't dereference src if it can be nil (JM) }
  1082. if len > 0 then
  1083. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  1084. len := length(temp);
  1085. if len > length(res) then
  1086. len := length(res);
  1087. {$r-}
  1088. move(temp[1],res[0],len*sizeof(widechar));
  1089. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1090. {$ifdef RangeCheckWasOn}
  1091. {$r+}
  1092. {$endif}
  1093. end;
  1094. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  1095. var
  1096. len: longint;
  1097. temp : widestring;
  1098. begin
  1099. len := length(src);
  1100. { make sure we don't access char 1 if length is 0 (JM) }
  1101. if len > 0 then
  1102. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  1103. len := length(temp);
  1104. if len > length(res) then
  1105. len := length(res);
  1106. {$r-}
  1107. move(temp[1],res[0],len*sizeof(widechar));
  1108. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1109. {$ifdef RangeCheckWasOn}
  1110. {$r+}
  1111. {$endif}
  1112. end;
  1113. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  1114. var
  1115. len: SizeInt;
  1116. begin
  1117. len := length(src);
  1118. if len > length(res) then
  1119. len := length(res);
  1120. {$r-}
  1121. { make sure we don't try to access element 1 of the widestring if it's nil }
  1122. if len > 0 then
  1123. move(src[1],res[0],len*SizeOf(WideChar));
  1124. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  1125. {$ifdef RangeCheckWasOn}
  1126. {$r+}
  1127. {$endif}
  1128. end;
  1129. {$endif ndef FPC_STRTOCHARARRAYPROC}
  1130. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  1131. {
  1132. Compares 2 UnicodeStrings;
  1133. The result is
  1134. <0 if S1<S2
  1135. 0 if S1=S2
  1136. >0 if S1>S2
  1137. }
  1138. Var
  1139. MaxI,Temp : SizeInt;
  1140. begin
  1141. if pointer(S1)=pointer(S2) then
  1142. begin
  1143. fpc_UnicodeStr_Compare:=0;
  1144. exit;
  1145. end;
  1146. Maxi:=Length(S1);
  1147. temp:=Length(S2);
  1148. If MaxI>Temp then
  1149. MaxI:=Temp;
  1150. Temp:=CompareWord(S1[1],S2[1],MaxI);
  1151. if temp=0 then
  1152. temp:=Length(S1)-Length(S2);
  1153. fpc_UnicodeStr_Compare:=Temp;
  1154. end;
  1155. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  1156. {
  1157. Compares 2 UnicodeStrings for equality only;
  1158. The result is
  1159. 0 if S1=S2
  1160. <>0 if S1<>S2
  1161. }
  1162. Var
  1163. MaxI : SizeInt;
  1164. begin
  1165. if pointer(S1)=pointer(S2) then
  1166. exit(0);
  1167. Maxi:=Length(S1);
  1168. If MaxI<>Length(S2) then
  1169. exit(-1)
  1170. else
  1171. exit(CompareWord(S1[1],S2[1],MaxI));
  1172. end;
  1173. {$ifdef VER2_4}
  1174. // obsolete but needed for bootstrapping with 2.4
  1175. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1176. begin
  1177. if p=nil then
  1178. HandleErrorFrame(201,get_frame);
  1179. end;
  1180. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1181. begin
  1182. if (index>len div 2) or (Index<1) then
  1183. HandleErrorFrame(201,get_frame);
  1184. end;
  1185. {$else VER2_4}
  1186. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1187. begin
  1188. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len div 2) or (Index<1) then
  1189. HandleErrorFrame(201,get_frame);
  1190. end;
  1191. {$endif VER2_4}
  1192. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1193. {
  1194. Sets The length of string S to L.
  1195. Makes sure S is unique, and contains enough room.
  1196. }
  1197. Var
  1198. Temp : Pointer;
  1199. movelen: SizeInt;
  1200. begin
  1201. if (l>0) then
  1202. begin
  1203. if Pointer(S)=nil then
  1204. begin
  1205. { Need a complete new string...}
  1206. Pointer(s):=NewUnicodeString(l);
  1207. end
  1208. else
  1209. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1210. begin
  1211. Dec(Pointer(S),UnicodeFirstOff);
  1212. if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
  1213. reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
  1214. Inc(Pointer(S), UnicodeFirstOff);
  1215. end
  1216. else
  1217. begin
  1218. { Reallocation is needed... }
  1219. Temp:=Pointer(NewUnicodeString(L));
  1220. if Length(S)>0 then
  1221. begin
  1222. if l < succ(length(s)) then
  1223. movelen := l
  1224. { also move terminating null }
  1225. else
  1226. movelen := succ(length(s));
  1227. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1228. end;
  1229. fpc_unicodestr_decr_ref(Pointer(S));
  1230. Pointer(S):=Temp;
  1231. end;
  1232. { Force nil termination in case it gets shorter }
  1233. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1234. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l*sizeof(UnicodeChar);
  1235. end
  1236. else
  1237. begin
  1238. { Length=0 }
  1239. if Pointer(S)<>nil then
  1240. fpc_unicodestr_decr_ref (Pointer(S));
  1241. Pointer(S):=Nil;
  1242. end;
  1243. end;
  1244. {*****************************************************************************
  1245. Public functions, In interface.
  1246. *****************************************************************************}
  1247. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1248. begin
  1249. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1250. end;
  1251. function StringToUnicodeChar(const Src : AnsiString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1252. var
  1253. temp:unicodestring;
  1254. begin
  1255. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1256. if Length(temp)<DestSize then
  1257. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1258. else
  1259. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1260. Dest[DestSize-1]:=#0;
  1261. result:=Dest;
  1262. end;
  1263. function WideCharToString(S : PWideChar) : UnicodeString;
  1264. begin
  1265. result:=WideCharLenToString(s,Length(WideString(s)));
  1266. end;
  1267. function StringToWideChar(const Src : AnsiString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1268. var
  1269. temp:widestring;
  1270. begin
  1271. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1272. if Length(temp)<DestSize then
  1273. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1274. else
  1275. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1276. Dest[DestSize-1]:=#0;
  1277. result:=Dest;
  1278. end;
  1279. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1280. begin
  1281. SetLength(result,Len);
  1282. Move(S^,Pointer(Result)^,Len*2);
  1283. end;
  1284. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1285. begin
  1286. Dest:=UnicodeCharLenToString(Src,Len);
  1287. end;
  1288. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1289. begin
  1290. Dest:=UnicodeCharLenToString(Src,Len);
  1291. end;
  1292. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1293. begin
  1294. Dest:=UnicodeCharToString(S);
  1295. end;
  1296. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1297. begin
  1298. SetLength(result,Len);
  1299. Move(S^,Pointer(Result)^,Len*2);
  1300. end;
  1301. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1302. begin
  1303. Dest:=WideCharLenToString(Src,Len);
  1304. end;
  1305. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1306. begin
  1307. Dest:=WideCharLenToString(Src,Len);
  1308. end;
  1309. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1310. begin
  1311. Dest:=WideCharToString(S);
  1312. end;
  1313. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1314. begin
  1315. Dest:=WideCharToString(S);
  1316. end;
  1317. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1318. {
  1319. Make sure reference count of S is 1,
  1320. using copy-on-write semantics.
  1321. }
  1322. Var
  1323. SNew : Pointer;
  1324. L : SizeInt;
  1325. begin
  1326. pointer(result) := pointer(s);
  1327. If Pointer(S)=Nil then
  1328. exit;
  1329. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1330. begin
  1331. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
  1332. SNew:=NewUnicodeString (L);
  1333. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1334. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
  1335. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1336. pointer(S):=SNew;
  1337. pointer(result):=SNew;
  1338. end;
  1339. end;
  1340. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1341. var
  1342. ResultAddress : Pointer;
  1343. begin
  1344. ResultAddress:=Nil;
  1345. dec(index);
  1346. if Index < 0 then
  1347. Index := 0;
  1348. { Check Size. Accounts for Zero-length S, the double check is needed because
  1349. Size can be maxint and will get <0 when adding index }
  1350. if (Size>Length(S)) or
  1351. (Index+Size>Length(S)) then
  1352. Size:=Length(S)-Index;
  1353. If Size>0 then
  1354. begin
  1355. If Index<0 Then
  1356. Index:=0;
  1357. ResultAddress:=Pointer(NewUnicodeString (Size));
  1358. if ResultAddress<>Nil then
  1359. begin
  1360. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1361. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size*sizeof(UnicodeChar);
  1362. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1363. end;
  1364. end;
  1365. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1366. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1367. end;
  1368. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1369. var
  1370. i,MaxLen : SizeInt;
  1371. pc : punicodechar;
  1372. begin
  1373. Pos:=0;
  1374. if Length(SubStr)>0 then
  1375. begin
  1376. MaxLen:=Length(source)-Length(SubStr);
  1377. i:=0;
  1378. pc:=@source[1];
  1379. while (i<=MaxLen) do
  1380. begin
  1381. inc(i);
  1382. if (SubStr[1]=pc^) and
  1383. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1384. begin
  1385. Pos:=i;
  1386. exit;
  1387. end;
  1388. inc(pc);
  1389. end;
  1390. end;
  1391. end;
  1392. { Faster version for a unicodechar alone }
  1393. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1394. var
  1395. i: SizeInt;
  1396. pc : punicodechar;
  1397. begin
  1398. pc:=@s[1];
  1399. for i:=1 to length(s) do
  1400. begin
  1401. if pc^=c then
  1402. begin
  1403. pos:=i;
  1404. exit;
  1405. end;
  1406. inc(pc);
  1407. end;
  1408. pos:=0;
  1409. end;
  1410. Function Pos (c : AnsiString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1411. begin
  1412. result:=Pos(UnicodeString(c),s);
  1413. end;
  1414. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1415. begin
  1416. result:=Pos(UnicodeString(c),s);
  1417. end;
  1418. Function Pos (c : UnicodeString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1419. begin
  1420. result:=Pos(c,UnicodeString(s));
  1421. end;
  1422. { Faster version for a char alone. Must be implemented because }
  1423. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1424. { using pos(char,pchar) will always call the shortstring version }
  1425. { (exact match for first argument), also with $h+ (JM) }
  1426. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1427. var
  1428. i: SizeInt;
  1429. wc : unicodechar;
  1430. pc : punicodechar;
  1431. begin
  1432. wc:=c;
  1433. pc:=@s[1];
  1434. for i:=1 to length(s) do
  1435. begin
  1436. if pc^=wc then
  1437. begin
  1438. pos:=i;
  1439. exit;
  1440. end;
  1441. inc(pc);
  1442. end;
  1443. pos:=0;
  1444. end;
  1445. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1446. Var
  1447. LS : SizeInt;
  1448. begin
  1449. LS:=Length(S);
  1450. if (Index>LS) or (Index<=0) or (Size<=0) then
  1451. exit;
  1452. UniqueString (S);
  1453. { (Size+Index) will overflow if Size=MaxInt. }
  1454. if Size>LS-Index then
  1455. Size:=LS-Index+1;
  1456. if Size<=LS-Index then
  1457. begin
  1458. Dec(Index);
  1459. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1460. end;
  1461. Setlength(s,LS-Size);
  1462. end;
  1463. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1464. var
  1465. Temp : UnicodeString;
  1466. LS : SizeInt;
  1467. begin
  1468. If Length(Source)=0 then
  1469. exit;
  1470. if index <= 0 then
  1471. index := 1;
  1472. Ls:=Length(S);
  1473. if index > LS then
  1474. index := LS+1;
  1475. Dec(Index);
  1476. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1477. SetLength(Temp,Length(Source)+LS);
  1478. If Index>0 then
  1479. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1480. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1481. If (LS-Index)>0 then
  1482. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1483. S:=Temp;
  1484. end;
  1485. Function UpCase(c:UnicodeChar):UnicodeChar;
  1486. var
  1487. s : UnicodeString;
  1488. begin
  1489. s:=c;
  1490. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1491. end;
  1492. function UpCase(const s : UnicodeString) : UnicodeString;
  1493. begin
  1494. result:=widestringmanager.UpperUnicodeStringProc(s);
  1495. end;
  1496. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1497. begin
  1498. SetLength(S,Len);
  1499. If (Buf<>Nil) and (Len>0) then
  1500. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1501. end;
  1502. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1503. var
  1504. BufLen: SizeInt;
  1505. begin
  1506. SetLength(S,Len);
  1507. If (Buf<>Nil) and (Len>0) then
  1508. begin
  1509. BufLen := IndexByte(Buf^, Len+1, 0);
  1510. If (BufLen>0) and (BufLen < Len) then
  1511. Len := BufLen;
  1512. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1513. //PUnicodeChar(Pointer(S)+Len*sizeof(UnicodeChar))^:=#0;
  1514. end;
  1515. end;
  1516. {$ifndef FPUNONE}
  1517. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1518. Var
  1519. SS : String;
  1520. begin
  1521. fpc_Val_Real_UnicodeStr := 0;
  1522. if length(S) > 255 then
  1523. code := 256
  1524. else
  1525. begin
  1526. SS := S;
  1527. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1528. end;
  1529. end;
  1530. {$endif}
  1531. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1532. var ss:shortstring;
  1533. begin
  1534. if length(s)>255 then
  1535. code:=256
  1536. else
  1537. begin
  1538. ss:=s;
  1539. val(ss,fpc_val_enum_unicodestr,code);
  1540. end;
  1541. end;
  1542. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1543. Var
  1544. SS : String;
  1545. begin
  1546. if length(S) > 255 then
  1547. begin
  1548. fpc_Val_Currency_UnicodeStr:=0;
  1549. code := 256;
  1550. end
  1551. else
  1552. begin
  1553. SS := S;
  1554. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1555. end;
  1556. end;
  1557. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1558. Var
  1559. SS : ShortString;
  1560. begin
  1561. fpc_Val_UInt_UnicodeStr := 0;
  1562. if length(S) > 255 then
  1563. code := 256
  1564. else
  1565. begin
  1566. SS := S;
  1567. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1568. end;
  1569. end;
  1570. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1571. Var
  1572. SS : ShortString;
  1573. begin
  1574. fpc_Val_SInt_UnicodeStr:=0;
  1575. if length(S)>255 then
  1576. code:=256
  1577. else
  1578. begin
  1579. SS := S;
  1580. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1581. end;
  1582. end;
  1583. {$ifndef CPU64}
  1584. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1585. Var
  1586. SS : ShortString;
  1587. begin
  1588. fpc_Val_qword_UnicodeStr:=0;
  1589. if length(S)>255 then
  1590. code:=256
  1591. else
  1592. begin
  1593. SS := S;
  1594. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1595. end;
  1596. end;
  1597. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1598. Var
  1599. SS : ShortString;
  1600. begin
  1601. fpc_Val_int64_UnicodeStr:=0;
  1602. if length(S)>255 then
  1603. code:=256
  1604. else
  1605. begin
  1606. SS := S;
  1607. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1608. end;
  1609. end;
  1610. {$endif CPU64}
  1611. {$ifndef FPUNONE}
  1612. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1613. var
  1614. ss : shortstring;
  1615. begin
  1616. str_real(len,fr,d,treal_type(rt),ss);
  1617. s:=ss;
  1618. end;
  1619. {$endif}
  1620. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1621. var ss:shortstring;
  1622. begin
  1623. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1624. s:=ss;
  1625. end;
  1626. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1627. var ss:shortstring;
  1628. begin
  1629. fpc_shortstr_bool(b,len,ss);
  1630. s:=ss;
  1631. end;
  1632. {$ifdef FPC_HAS_STR_CURRENCY}
  1633. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1634. var
  1635. ss : shortstring;
  1636. begin
  1637. str(c:len:fr,ss);
  1638. s:=ss;
  1639. end;
  1640. {$endif FPC_HAS_STR_CURRENCY}
  1641. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1642. Var
  1643. SS : ShortString;
  1644. begin
  1645. Str (v:Len,SS);
  1646. S:=SS;
  1647. end;
  1648. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1649. Var
  1650. SS : ShortString;
  1651. begin
  1652. str(v:Len,SS);
  1653. S:=SS;
  1654. end;
  1655. {$ifndef CPU64}
  1656. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1657. Var
  1658. SS : ShortString;
  1659. begin
  1660. Str (v:Len,SS);
  1661. S:=SS;
  1662. end;
  1663. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1664. Var
  1665. SS : ShortString;
  1666. begin
  1667. str(v:Len,SS);
  1668. S:=SS;
  1669. end;
  1670. {$endif CPU64}
  1671. { converts an utf-16 code point or surrogate pair to utf-32 }
  1672. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1673. var
  1674. w: unicodechar;
  1675. begin
  1676. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1677. { are the same in UTF-32 }
  1678. w:=s[index];
  1679. if (w<=#$d7ff) or
  1680. (w>=#$e000) then
  1681. begin
  1682. result:=UCS4Char(w);
  1683. len:=1;
  1684. end
  1685. { valid surrogate pair? }
  1686. else if (w<=#$dbff) and
  1687. { w>=#$d7ff check not needed, checked above }
  1688. (index<length(s)) and
  1689. (s[index+1]>=#$dc00) and
  1690. (s[index+1]<=#$dfff) then
  1691. { convert the surrogate pair to UTF-32 }
  1692. begin
  1693. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1694. len:=2;
  1695. end
  1696. else
  1697. { invalid surrogate -> do nothing }
  1698. begin
  1699. result:=UCS4Char(w);
  1700. len:=1;
  1701. end;
  1702. end;
  1703. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1704. begin
  1705. if assigned(Source) then
  1706. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1707. else
  1708. Result:=0;
  1709. end;
  1710. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1711. var
  1712. i,j : SizeUInt;
  1713. w : word;
  1714. lw : longword;
  1715. len : longint;
  1716. begin
  1717. result:=0;
  1718. if source=nil then
  1719. exit;
  1720. i:=0;
  1721. j:=0;
  1722. if assigned(Dest) then
  1723. begin
  1724. while (i<SourceChars) and (j<MaxDestBytes) do
  1725. begin
  1726. w:=word(Source[i]);
  1727. case w of
  1728. 0..$7f:
  1729. begin
  1730. Dest[j]:=char(w);
  1731. inc(j);
  1732. end;
  1733. $80..$7ff:
  1734. begin
  1735. if j+1>=MaxDestBytes then
  1736. break;
  1737. Dest[j]:=char($c0 or (w shr 6));
  1738. Dest[j+1]:=char($80 or (w and $3f));
  1739. inc(j,2);
  1740. end;
  1741. $800..$d7ff,$e000..$ffff:
  1742. begin
  1743. if j+2>=MaxDestBytes then
  1744. break;
  1745. Dest[j]:=char($e0 or (w shr 12));
  1746. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1747. Dest[j+2]:=char($80 or (w and $3f));
  1748. inc(j,3);
  1749. end;
  1750. $d800..$dbff:
  1751. {High Surrogates}
  1752. begin
  1753. if j+3>=MaxDestBytes then
  1754. break;
  1755. if (i<sourcechars-1) and
  1756. (word(Source[i+1]) >= $dc00) and
  1757. (word(Source[i+1]) <= $dfff) then
  1758. begin
  1759. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1760. Dest[j]:=char($f0 or (lw shr 18));
  1761. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1762. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1763. Dest[j+3]:=char($80 or (lw and $3f));
  1764. inc(j,4);
  1765. inc(i);
  1766. end;
  1767. end;
  1768. end;
  1769. inc(i);
  1770. end;
  1771. if j>SizeUInt(MaxDestBytes-1) then
  1772. j:=MaxDestBytes-1;
  1773. Dest[j]:=#0;
  1774. end
  1775. else
  1776. begin
  1777. while i<SourceChars do
  1778. begin
  1779. case word(Source[i]) of
  1780. $0..$7f:
  1781. inc(j);
  1782. $80..$7ff:
  1783. inc(j,2);
  1784. $800..$d7ff,$e000..$ffff:
  1785. inc(j,3);
  1786. $d800..$dbff:
  1787. begin
  1788. if (i<sourcechars-1) and
  1789. (word(Source[i+1]) >= $dc00) and
  1790. (word(Source[i+1]) <= $dfff) then
  1791. begin
  1792. inc(j,4);
  1793. inc(i);
  1794. end;
  1795. end;
  1796. end;
  1797. inc(i);
  1798. end;
  1799. end;
  1800. result:=j+1;
  1801. end;
  1802. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1803. begin
  1804. if assigned(Source) then
  1805. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1806. else
  1807. Result:=0;
  1808. end;
  1809. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1810. const
  1811. UNICODE_INVALID=63;
  1812. var
  1813. InputUTF8: SizeUInt;
  1814. IBYTE: BYTE;
  1815. OutputUnicode: SizeUInt;
  1816. PRECHAR: SizeUInt;
  1817. TempBYTE: BYTE;
  1818. CharLen: SizeUint;
  1819. LookAhead: SizeUInt;
  1820. UC: SizeUInt;
  1821. begin
  1822. if not assigned(Source) then
  1823. begin
  1824. result:=0;
  1825. exit;
  1826. end;
  1827. result:=SizeUInt(-1);
  1828. InputUTF8:=0;
  1829. OutputUnicode:=0;
  1830. PreChar:=0;
  1831. if Assigned(Dest) Then
  1832. begin
  1833. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1834. begin
  1835. IBYTE:=byte(Source[InputUTF8]);
  1836. if (IBYTE and $80) = 0 then
  1837. begin
  1838. //One character US-ASCII, convert it to unicode
  1839. if IBYTE = 10 then
  1840. begin
  1841. If (PreChar<>13) and FALSE then
  1842. begin
  1843. //Expand to crlf, conform UTF-8.
  1844. //This procedure will break the memory alocation by
  1845. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1846. if OutputUnicode+1<MaxDestChars then
  1847. begin
  1848. Dest[OutputUnicode]:=WideChar(13);
  1849. inc(OutputUnicode);
  1850. Dest[OutputUnicode]:=WideChar(10);
  1851. inc(OutputUnicode);
  1852. PreChar:=10;
  1853. end
  1854. else
  1855. begin
  1856. Dest[OutputUnicode]:=WideChar(13);
  1857. inc(OutputUnicode);
  1858. end;
  1859. end
  1860. else
  1861. begin
  1862. Dest[OutputUnicode]:=WideChar(IBYTE);
  1863. inc(OutputUnicode);
  1864. PreChar:=IBYTE;
  1865. end;
  1866. end
  1867. else
  1868. begin
  1869. Dest[OutputUnicode]:=WideChar(IBYTE);
  1870. inc(OutputUnicode);
  1871. PreChar:=IBYTE;
  1872. end;
  1873. inc(InputUTF8);
  1874. end
  1875. else
  1876. begin
  1877. TempByte:=IBYTE;
  1878. CharLen:=0;
  1879. while (TempBYTE and $80)<>0 do
  1880. begin
  1881. TempBYTE:=(TempBYTE shl 1) and $FE;
  1882. inc(CharLen);
  1883. end;
  1884. //Test for the "CharLen" conforms UTF-8 string
  1885. //This means the 10xxxxxx pattern.
  1886. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1887. begin
  1888. //Insuficient chars in string to decode
  1889. //UTF-8 array. Fallback to single char.
  1890. CharLen:= 1;
  1891. end;
  1892. for LookAhead := 1 to CharLen-1 do
  1893. begin
  1894. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1895. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1896. begin
  1897. //Invalid UTF-8 sequence, fallback.
  1898. CharLen:= LookAhead;
  1899. break;
  1900. end;
  1901. end;
  1902. UC:=$FFFF;
  1903. case CharLen of
  1904. 1: begin
  1905. //Not valid UTF-8 sequence
  1906. UC:=UNICODE_INVALID;
  1907. end;
  1908. 2: begin
  1909. //Two bytes UTF, convert it
  1910. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1911. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1912. if UC <= $7F then
  1913. begin
  1914. //Invalid UTF sequence.
  1915. UC:=UNICODE_INVALID;
  1916. end;
  1917. end;
  1918. 3: begin
  1919. //Three bytes, convert it to unicode
  1920. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1921. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1922. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1923. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1924. begin
  1925. //Invalid UTF-8 sequence
  1926. UC:= UNICODE_INVALID;
  1927. End;
  1928. end;
  1929. 4: begin
  1930. //Four bytes, convert it to two unicode characters
  1931. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1932. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1933. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1934. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1935. if (UC < $10000) or (UC > $10FFFF) then
  1936. begin
  1937. UC:= UNICODE_INVALID;
  1938. end
  1939. else
  1940. begin
  1941. { only store pair if room }
  1942. dec(UC,$10000);
  1943. if (OutputUnicode<MaxDestChars-1) then
  1944. begin
  1945. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1946. inc(OutputUnicode);
  1947. UC:=(UC and $3ff) + $DC00;
  1948. end
  1949. else
  1950. begin
  1951. InputUTF8:= InputUTF8 + CharLen;
  1952. { don't store anything }
  1953. CharLen:=0;
  1954. end;
  1955. end;
  1956. end;
  1957. 5,6,7: begin
  1958. //Invalid UTF8 to unicode conversion,
  1959. //mask it as invalid UNICODE too.
  1960. UC:=UNICODE_INVALID;
  1961. end;
  1962. end;
  1963. if CharLen > 0 then
  1964. begin
  1965. PreChar:=UC;
  1966. Dest[OutputUnicode]:=WideChar(UC);
  1967. inc(OutputUnicode);
  1968. end;
  1969. InputUTF8:= InputUTF8 + CharLen;
  1970. end;
  1971. end;
  1972. Result:=OutputUnicode+1;
  1973. end
  1974. else
  1975. begin
  1976. while (InputUTF8<SourceBytes) do
  1977. begin
  1978. IBYTE:=byte(Source[InputUTF8]);
  1979. if (IBYTE and $80) = 0 then
  1980. begin
  1981. //One character US-ASCII, convert it to unicode
  1982. if IBYTE = 10 then
  1983. begin
  1984. if (PreChar<>13) and FALSE then
  1985. begin
  1986. //Expand to crlf, conform UTF-8.
  1987. //This procedure will break the memory alocation by
  1988. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1989. inc(OutputUnicode,2);
  1990. PreChar:=10;
  1991. end
  1992. else
  1993. begin
  1994. inc(OutputUnicode);
  1995. PreChar:=IBYTE;
  1996. end;
  1997. end
  1998. else
  1999. begin
  2000. inc(OutputUnicode);
  2001. PreChar:=IBYTE;
  2002. end;
  2003. inc(InputUTF8);
  2004. end
  2005. else
  2006. begin
  2007. TempByte:=IBYTE;
  2008. CharLen:=0;
  2009. while (TempBYTE and $80)<>0 do
  2010. begin
  2011. TempBYTE:=(TempBYTE shl 1) and $FE;
  2012. inc(CharLen);
  2013. end;
  2014. //Test for the "CharLen" conforms UTF-8 string
  2015. //This means the 10xxxxxx pattern.
  2016. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  2017. begin
  2018. //Insuficient chars in string to decode
  2019. //UTF-8 array. Fallback to single char.
  2020. CharLen:= 1;
  2021. end;
  2022. for LookAhead := 1 to CharLen-1 do
  2023. begin
  2024. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  2025. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  2026. begin
  2027. //Invalid UTF-8 sequence, fallback.
  2028. CharLen:= LookAhead;
  2029. break;
  2030. end;
  2031. end;
  2032. UC:=$FFFF;
  2033. case CharLen of
  2034. 1: begin
  2035. //Not valid UTF-8 sequence
  2036. UC:=UNICODE_INVALID;
  2037. end;
  2038. 2: begin
  2039. //Two bytes UTF, convert it
  2040. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  2041. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  2042. if UC <= $7F then
  2043. begin
  2044. //Invalid UTF sequence.
  2045. UC:=UNICODE_INVALID;
  2046. end;
  2047. end;
  2048. 3: begin
  2049. //Three bytes, convert it to unicode
  2050. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  2051. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  2052. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  2053. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  2054. begin
  2055. //Invalid UTF-8 sequence
  2056. UC:= UNICODE_INVALID;
  2057. end;
  2058. end;
  2059. 4: begin
  2060. //Four bytes, convert it to two unicode characters
  2061. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  2062. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  2063. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  2064. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  2065. if (UC < $10000) or (UC > $10FFFF) then
  2066. UC:= UNICODE_INVALID
  2067. else
  2068. { extra character character }
  2069. inc(OutputUnicode);
  2070. end;
  2071. 5,6,7: begin
  2072. //Invalid UTF8 to unicode conversion,
  2073. //mask it as invalid UNICODE too.
  2074. UC:=UNICODE_INVALID;
  2075. end;
  2076. end;
  2077. if CharLen > 0 then
  2078. begin
  2079. PreChar:=UC;
  2080. inc(OutputUnicode);
  2081. end;
  2082. InputUTF8:= InputUTF8 + CharLen;
  2083. end;
  2084. end;
  2085. Result:=OutputUnicode+1;
  2086. end;
  2087. end;
  2088. function UTF8Encode(const s : Ansistring) : UTF8String; inline;
  2089. begin
  2090. Result:=UTF8Encode(UnicodeString(s));
  2091. end;
  2092. function UTF8Encode(const s : UnicodeString) : UTF8String;
  2093. var
  2094. i : SizeInt;
  2095. hs : UTF8String;
  2096. begin
  2097. result:='';
  2098. if s='' then
  2099. exit;
  2100. SetLength(hs,length(s)*3);
  2101. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  2102. if i>0 then
  2103. begin
  2104. SetLength(hs,i-1);
  2105. result:=hs;
  2106. end;
  2107. end;
  2108. function UTF8Decode(const s : UTF8String): UnicodeString;
  2109. var
  2110. i : SizeInt;
  2111. hs : UnicodeString;
  2112. begin
  2113. result:='';
  2114. if s='' then
  2115. exit;
  2116. SetLength(hs,length(s));
  2117. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  2118. if i>0 then
  2119. begin
  2120. SetLength(hs,i-1);
  2121. result:=hs;
  2122. end;
  2123. end;
  2124. function AnsiToUtf8(const s : RawByteString): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
  2125. begin
  2126. Result:=Utf8Encode(s);
  2127. end;
  2128. function Utf8ToAnsi(const s : UTF8String) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  2129. begin
  2130. Result:=Utf8Decode(s);
  2131. end;
  2132. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  2133. var
  2134. i, slen,
  2135. destindex : SizeInt;
  2136. len : longint;
  2137. begin
  2138. slen:=length(s);
  2139. setlength(result,slen+1);
  2140. i:=1;
  2141. destindex:=0;
  2142. while (i<=slen) do
  2143. begin
  2144. result[destindex]:=utf16toutf32(s,i,len);
  2145. inc(destindex);
  2146. inc(i,len);
  2147. end;
  2148. { destindex <= slen (surrogate pairs may have been merged) }
  2149. { destindex+1 for terminating #0 (dynamic arrays are }
  2150. { implicitely filled with zero) }
  2151. setlength(result,destindex+1);
  2152. end;
  2153. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  2154. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  2155. var
  2156. p : PUnicodeChar;
  2157. begin
  2158. { if nc > $ffff, we need two places }
  2159. if (index+ord(nc > $ffff)>length(s)) then
  2160. if (length(s) < 10*256) then
  2161. setlength(s,length(s)+10)
  2162. else
  2163. setlength(s,length(s)+length(s) shr 8);
  2164. { we know that s is unique -> avoid uniquestring calls}
  2165. p:=@s[index];
  2166. if (nc<$ffff) then
  2167. begin
  2168. p^:=unicodechar(nc);
  2169. inc(index);
  2170. end
  2171. else if (dword(nc)<=$10ffff) then
  2172. begin
  2173. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  2174. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  2175. inc(index,2);
  2176. end
  2177. else
  2178. { invalid code point }
  2179. begin
  2180. p^:='?';
  2181. inc(index);
  2182. end;
  2183. end;
  2184. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  2185. var
  2186. i : SizeInt;
  2187. resindex : SizeInt;
  2188. begin
  2189. { skip terminating #0 }
  2190. SetLength(result,length(s)-1);
  2191. resindex:=1;
  2192. for i:=0 to high(s)-1 do
  2193. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  2194. { adjust result length (may be too big due to growing }
  2195. { for surrogate pairs) }
  2196. setlength(result,resindex-1);
  2197. end;
  2198. function WideStringToUCS4String(const s : WideString) : UCS4String;
  2199. var
  2200. i, slen,
  2201. destindex : SizeInt;
  2202. len : longint;
  2203. begin
  2204. slen:=length(s);
  2205. setlength(result,slen+1);
  2206. i:=1;
  2207. destindex:=0;
  2208. while (i<=slen) do
  2209. begin
  2210. result[destindex]:=utf16toutf32(s,i,len);
  2211. inc(destindex);
  2212. inc(i,len);
  2213. end;
  2214. { destindex <= slen (surrogate pairs may have been merged) }
  2215. { destindex+1 for terminating #0 (dynamic arrays are }
  2216. { implicitely filled with zero) }
  2217. setlength(result,destindex+1);
  2218. end;
  2219. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  2220. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  2221. var
  2222. p : PWideChar;
  2223. begin
  2224. { if nc > $ffff, we need two places }
  2225. if (index+ord(nc > $ffff)>length(s)) then
  2226. if (length(s) < 10*256) then
  2227. setlength(s,length(s)+10)
  2228. else
  2229. setlength(s,length(s)+length(s) shr 8);
  2230. { we know that s is unique -> avoid uniquestring calls}
  2231. p:=@s[index];
  2232. if (nc<$ffff) then
  2233. begin
  2234. p^:=widechar(nc);
  2235. inc(index);
  2236. end
  2237. else if (dword(nc)<=$10ffff) then
  2238. begin
  2239. p^:=widechar((nc - $10000) shr 10 + $d800);
  2240. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  2241. inc(index,2);
  2242. end
  2243. else
  2244. { invalid code point }
  2245. begin
  2246. p^:='?';
  2247. inc(index);
  2248. end;
  2249. end;
  2250. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2251. var
  2252. i : SizeInt;
  2253. resindex : SizeInt;
  2254. begin
  2255. { skip terminating #0 }
  2256. SetLength(result,length(s)-1);
  2257. resindex:=1;
  2258. for i:=0 to high(s)-1 do
  2259. ConcatUTF32ToWideStr(s[i],result,resindex);
  2260. { adjust result length (may be too big due to growing }
  2261. { for surrogate pairs) }
  2262. setlength(result,resindex-1);
  2263. end;
  2264. const
  2265. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2266. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2267. procedure unimplementedunicodestring;
  2268. begin
  2269. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2270. If IsConsole then
  2271. begin
  2272. Writeln(StdErr,SNoUnicodestrings);
  2273. Writeln(StdErr,SRecompileWithUnicodestrings);
  2274. end;
  2275. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2276. HandleErrorFrame(233,get_frame);
  2277. end;
  2278. function StringElementSize(const S: UnicodeString): Word; overload;
  2279. begin
  2280. if assigned(Pointer(S)) then
  2281. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2282. else
  2283. Result:=SizeOf(UnicodeChar);
  2284. end;
  2285. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2286. begin
  2287. if assigned(Pointer(S)) then
  2288. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2289. else
  2290. Result:=SizeOf(UnicodeChar);
  2291. end;
  2292. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2293. begin
  2294. if assigned(Pointer(S)) then
  2295. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2296. else
  2297. Result:=SizeOf(UnicodeChar);
  2298. end;
  2299. {$warnings off}
  2300. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2301. begin
  2302. unimplementedunicodestring;
  2303. end;
  2304. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2305. begin
  2306. unimplementedunicodestring;
  2307. end;
  2308. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2309. begin
  2310. unimplementedunicodestring;
  2311. end;
  2312. {$warnings on}
  2313. procedure initunicodestringmanager;
  2314. begin
  2315. {$ifndef HAS_WIDESTRINGMANAGER}
  2316. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2317. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2318. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2319. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2320. {$endif HAS_WIDESTRINGMANAGER}
  2321. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2322. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2323. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2324. {$ifndef HAS_WIDESTRINGMANAGER}
  2325. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2326. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2327. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2328. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2329. {$endif HAS_WIDESTRINGMANAGER}
  2330. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2331. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2332. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2333. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2334. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2335. end;