ustrings.inc 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of chars. Multiply with
  20. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  21. @ : String + Terminating #0;
  22. Punicodechar(Unicodestring) is a valid typecast.
  23. So WS[i] is converted to the address @WS+i-1.
  24. Constants should be assigned a reference count of -1
  25. Meaning that they can't be disposed of.
  26. }
  27. Type
  28. PUnicodeRec = ^TUnicodeRec;
  29. TUnicodeRec = Packed Record
  30. CodePage : TSystemCodePage;
  31. ElementSize : Word;
  32. {$ifdef CPU64}
  33. { align fields }
  34. Dummy : DWord;
  35. {$endif CPU64}
  36. Ref : SizeInt;
  37. Len : SizeInt;
  38. First : UnicodeChar;
  39. end;
  40. Const
  41. UnicodeRecLen = SizeOf(TUnicodeRec);
  42. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overwritten for the Current Locale
  47. }
  48. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  49. var
  50. i : SizeInt;
  51. p : PAnsiChar;
  52. begin
  53. setlength(dest,len);
  54. p:=pointer(dest); {SetLength guarantees that dest is unique}
  55. for i:=1 to len do
  56. begin
  57. if word(source^)<256 then
  58. p^:=char(word(source^))
  59. else
  60. p^:='?';
  61. inc(source);
  62. inc(p);
  63. end;
  64. end;
  65. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  66. var
  67. i : SizeInt;
  68. p : PUnicodeChar;
  69. begin
  70. setlength(dest,len);
  71. p:=pointer(dest); {SetLength guarantees that dest is unique}
  72. for i:=1 to len do
  73. begin
  74. p^:=unicodechar(byte(source^));
  75. inc(source);
  76. inc(p);
  77. end;
  78. end;
  79. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  80. begin
  81. DefaultCharLengthPChar:=length(Str);
  82. end;
  83. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  84. begin
  85. if str[0]<>#0 then
  86. DefaultCodePointLength:=1
  87. else
  88. DefaultCodePointLength:=0;
  89. end;
  90. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  91. begin
  92. { don't raise an exception here. We need this for text file handling }
  93. Result:=DefaultSystemCodePage;
  94. end;
  95. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  96. begin
  97. manager:=widestringmanager;
  98. end;
  99. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  100. begin
  101. Old:=widestringmanager;
  102. widestringmanager:=New;
  103. end;
  104. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  105. begin
  106. widestringmanager:=New;
  107. end;
  108. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  109. begin
  110. manager:=widestringmanager;
  111. end;
  112. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  113. begin
  114. Old:=widestringmanager;
  115. widestringmanager:=New;
  116. end;
  117. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  118. begin
  119. widestringmanager:=New;
  120. end;
  121. {****************************************************************************
  122. Internal functions, not in interface.
  123. ****************************************************************************}
  124. procedure UnicodeStringError;
  125. begin
  126. HandleErrorFrame(204,get_frame);
  127. end;
  128. {$ifdef UnicodeStrDebug}
  129. Procedure DumpUnicodeRec(S : Pointer);
  130. begin
  131. If S=Nil then
  132. Writeln ('String is nil')
  133. Else
  134. Begin
  135. With PUnicodeRec(S-UnicodeFirstOff)^ do
  136. begin
  137. Write ('(Len:',len);
  138. Writeln (' Ref: ',ref,')');
  139. end;
  140. end;
  141. end;
  142. {$endif}
  143. Function NewUnicodeString(Len : SizeInt) : Pointer;
  144. {
  145. Allocate a new UnicodeString on the heap.
  146. initialize it to zero length and reference count 1.
  147. }
  148. Var
  149. P : Pointer;
  150. begin
  151. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  152. If P<>Nil then
  153. begin
  154. PUnicodeRec(P)^.Len:=Len; { Initial length }
  155. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  156. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  157. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  158. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  159. inc(p,UnicodeFirstOff); { Points to string now }
  160. end
  161. else
  162. UnicodeStringError;
  163. NewUnicodeString:=P;
  164. end;
  165. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  166. {
  167. Decreases the ReferenceCount of a non constant unicodestring;
  168. If the reference count is zero, deallocate the string;
  169. }
  170. Var
  171. p: PUnicodeRec;
  172. Begin
  173. { Zero string }
  174. if S=Nil then
  175. exit;
  176. { check for constant strings ...}
  177. p:=PUnicodeRec(S-UnicodeFirstOff);
  178. if p^.Ref<0 then
  179. exit;
  180. { declocked does a MT safe dec and returns true, if the counter is 0 }
  181. if declocked(p^.Ref) then
  182. begin
  183. FreeMem(p);
  184. S:=nil;
  185. end;
  186. end;
  187. { alias for internal use }
  188. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  189. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  190. Begin
  191. If S=Nil then
  192. exit;
  193. { constant string ? }
  194. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  195. exit;
  196. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  197. end;
  198. { alias for internal use }
  199. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  200. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  201. {
  202. Converts a UnicodeString to a ShortString;
  203. }
  204. Var
  205. Size : SizeInt;
  206. temp : ansistring;
  207. begin
  208. res:='';
  209. Size:=Length(S2);
  210. if Size>0 then
  211. begin
  212. If Size>high(res) then
  213. Size:=high(res);
  214. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  215. res:=temp;
  216. end;
  217. end;
  218. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  219. {
  220. Converts a ShortString to a UnicodeString;
  221. }
  222. Var
  223. Size : SizeInt;
  224. begin
  225. result:='';
  226. Size:=Length(S2);
  227. if Size>0 then
  228. begin
  229. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  230. { Terminating Zero }
  231. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  232. end;
  233. end;
  234. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  235. {
  236. Converts a UnicodeString to an AnsiString
  237. }
  238. Var
  239. Size : SizeInt;
  240. {$ifndef FPC_HAS_CPSTRING}
  241. cp : TSystemCodePage;
  242. {$endif FPC_HAS_CPSTRING}
  243. begin
  244. {$ifndef FPC_HAS_CPSTRING}
  245. cp:=DefaultSystemCodePage;
  246. {$endif FPC_HAS_CPSTRING}
  247. result:='';
  248. Size:=Length(S2);
  249. if Size>0 then
  250. begin
  251. if (cp=CP_ACP) then
  252. cp:=DefaultSystemCodePage;
  253. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  254. end;
  255. end;
  256. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  257. {
  258. Converts an AnsiString to a UnicodeString;
  259. }
  260. Var
  261. Size : SizeInt;
  262. cp: TSystemCodePage;
  263. begin
  264. result:='';
  265. Size:=Length(S2);
  266. if Size>0 then
  267. begin
  268. cp:=StringCodePage(S2);
  269. if (cp=CP_ACP) then
  270. cp:=DefaultSystemCodePage;
  271. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  272. end;
  273. end;
  274. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  275. begin
  276. SetLength(Result,Length(S2));
  277. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  278. end;
  279. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  280. begin
  281. SetLength(Result,Length(S2));
  282. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  283. end;
  284. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  285. var
  286. Size : SizeInt;
  287. {$ifndef FPC_HAS_CPSTRING}
  288. cp : TSystemCodePage;
  289. {$endif FPC_HAS_CPSTRING}
  290. begin
  291. {$ifndef FPC_HAS_CPSTRING}
  292. cp:=DefaultSystemCodePage;
  293. {$endif FPC_HAS_CPSTRING}
  294. result:='';
  295. if p=nil then
  296. exit;
  297. Size := IndexWord(p^, -1, 0);
  298. if Size>0 then
  299. widestringmanager.Unicode2AnsiMoveProc(P,result,cp,Size);
  300. end;
  301. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  302. var
  303. Size : SizeInt;
  304. begin
  305. result:='';
  306. if p=nil then
  307. exit;
  308. Size := IndexWord(p^, -1, 0);
  309. Setlength(result,Size);
  310. if Size>0 then
  311. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  312. end;
  313. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  314. var
  315. Size : SizeInt;
  316. begin
  317. result:='';
  318. if p=nil then
  319. exit;
  320. Size := IndexWord(p^, -1, 0);
  321. Setlength(result,Size);
  322. if Size>0 then
  323. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  324. end;
  325. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  326. var
  327. Size : SizeInt;
  328. temp: ansistring;
  329. begin
  330. res:='';
  331. if p=nil then
  332. exit;
  333. Size:=IndexWord(p^, high(PtrInt), 0);
  334. if Size>0 then
  335. begin
  336. widestringmanager.Unicode2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  337. res:=temp;
  338. end;
  339. end;
  340. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  341. var
  342. Size : SizeInt;
  343. {$ifndef FPC_HAS_CPSTRING}
  344. cp : TSystemCodePage;
  345. {$endif FPC_HAS_CPSTRING}
  346. begin
  347. {$ifndef FPC_HAS_CPSTRING}
  348. cp:=DefaultSystemCodePage;
  349. {$endif FPC_HAS_CPSTRING}
  350. result:='';
  351. if p=nil then
  352. exit;
  353. Size := IndexWord(p^, -1, 0);
  354. if Size>0 then
  355. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  356. end;
  357. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  358. var
  359. Size : SizeInt;
  360. temp: ansistring;
  361. begin
  362. res:='';
  363. if p=nil then
  364. exit;
  365. Size:=IndexWord(p^, high(PtrInt), 0);
  366. if Size>0 then
  367. begin
  368. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  369. res:=temp;
  370. end;
  371. end;
  372. { checked against the ansistring routine, 2001-05-27 (FK) }
  373. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  374. {
  375. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  376. }
  377. begin
  378. If S2<>nil then
  379. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  380. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  381. { Decrease the reference count on the old S1 }
  382. fpc_unicodestr_decr_ref (S1);
  383. s1:=s2;
  384. end;
  385. { alias for internal use }
  386. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  387. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  388. Var
  389. Size,Location : SizeInt;
  390. same : boolean;
  391. begin
  392. { only assign if s1 or s2 is empty }
  393. if (S1='') then
  394. begin
  395. DestS:=s2;
  396. exit;
  397. end;
  398. if (S2='') then
  399. begin
  400. DestS:=s1;
  401. exit;
  402. end;
  403. Location:=Length(S1);
  404. Size:=length(S2);
  405. { Use Pointer() typecasts to prevent extra conversion code }
  406. if Pointer(DestS)=Pointer(S1) then
  407. begin
  408. same:=Pointer(S1)=Pointer(S2);
  409. SetLength(DestS,Size+Location);
  410. if same then
  411. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  412. else
  413. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  414. end
  415. else if Pointer(DestS)=Pointer(S2) then
  416. begin
  417. SetLength(DestS,Size+Location);
  418. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  419. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  420. end
  421. else
  422. begin
  423. DestS:='';
  424. SetLength(DestS,Size+Location);
  425. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  426. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  427. end;
  428. end;
  429. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  430. Var
  431. i : Longint;
  432. p,pc : pointer;
  433. Size,NewLen : SizeInt;
  434. lowstart : longint;
  435. destcopy : pointer;
  436. OldDestLen : SizeInt;
  437. begin
  438. if high(sarr)=0 then
  439. begin
  440. DestS:='';
  441. exit;
  442. end;
  443. destcopy:=nil;
  444. lowstart:=low(sarr);
  445. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  446. inc(lowstart);
  447. { Check for another reuse, then we can't use
  448. the append optimization }
  449. for i:=lowstart to high(sarr) do
  450. begin
  451. if Pointer(DestS)=Pointer(sarr[i]) then
  452. begin
  453. { if DestS is used somewhere in the middle of the expression,
  454. we need to make sure the original string still exists after
  455. we empty/modify DestS.
  456. This trick only works with reference counted strings. Therefor
  457. this optimization is disabled for WINLIKEUNICODESTRING }
  458. destcopy:=pointer(dests);
  459. fpc_UnicodeStr_Incr_Ref(destcopy);
  460. lowstart:=low(sarr);
  461. break;
  462. end;
  463. end;
  464. { Start with empty DestS if we start with concatting
  465. the first array element }
  466. if lowstart=low(sarr) then
  467. DestS:='';
  468. OldDestLen:=length(DestS);
  469. { Calculate size of the result so we can do
  470. a single call to SetLength() }
  471. NewLen:=0;
  472. for i:=low(sarr) to high(sarr) do
  473. inc(NewLen,length(sarr[i]));
  474. SetLength(DestS,NewLen);
  475. { Concat all strings, except the string we already
  476. copied in DestS }
  477. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  478. for i:=lowstart to high(sarr) do
  479. begin
  480. p:=pointer(sarr[i]);
  481. if assigned(p) then
  482. begin
  483. Size:=length(unicodestring(p));
  484. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  485. inc(pc,size*sizeof(UnicodeChar));
  486. end;
  487. end;
  488. fpc_UnicodeStr_Decr_Ref(destcopy);
  489. end;
  490. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  491. var
  492. w: unicodestring;
  493. begin
  494. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  495. fpc_Char_To_UChar:=w[1];
  496. end;
  497. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  498. {
  499. Converts a Char to a UnicodeString;
  500. }
  501. begin
  502. Setlength(fpc_Char_To_UnicodeStr,1);
  503. fpc_Char_To_UnicodeStr[1]:=c;
  504. { Terminating Zero }
  505. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  506. end;
  507. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  508. {
  509. Converts a UnicodeChar to a Char;
  510. }
  511. var
  512. s: ansistring;
  513. begin
  514. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  515. if length(s)=1 then
  516. fpc_UChar_To_Char:= s[1]
  517. else
  518. fpc_UChar_To_Char:='?';
  519. end;
  520. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  521. {
  522. Converts a WideChar to a UnicodeString;
  523. }
  524. begin
  525. Setlength (Result,1);
  526. Result[1]:= c;
  527. end;
  528. Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
  529. var
  530. w: widestring;
  531. begin
  532. widestringmanager.Ansi2WideMoveProc(@c,DefaultSystemCodePage,w,1);
  533. fpc_Char_To_WChar:=w[1];
  534. end;
  535. Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
  536. {
  537. Converts a WideChar to a Char;
  538. }
  539. var
  540. s: ansistring;
  541. begin
  542. widestringmanager.Wide2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  543. if length(s)=1 then
  544. fpc_WChar_To_Char:= s[1]
  545. else
  546. fpc_WChar_To_Char:='?';
  547. end;
  548. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  549. {
  550. Converts a WideChar to a ShortString;
  551. }
  552. var
  553. s: ansistring;
  554. begin
  555. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  556. res:=s;
  557. end;
  558. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  559. {
  560. Converts a UnicodeChar to a UnicodeString;
  561. }
  562. begin
  563. Setlength (fpc_UChar_To_UnicodeStr,1);
  564. fpc_UChar_To_UnicodeStr[1]:= c;
  565. end;
  566. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  567. {
  568. Converts a UnicodeChar to a AnsiString;
  569. }
  570. {$ifndef FPC_HAS_CPSTRING}
  571. var
  572. cp : TSystemCodePage;
  573. {$endif FPC_HAS_CPSTRING}
  574. begin
  575. {$ifndef FPC_HAS_CPSTRING}
  576. cp:=DefaultSystemCodePage;
  577. {$endif FPC_HAS_CPSTRING}
  578. if (cp=CP_ACP) then
  579. cp:=DefaultSystemCodePage;
  580. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  581. end;
  582. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  583. {
  584. Converts a UnicodeChar to a ShortString;
  585. }
  586. var
  587. s: ansistring;
  588. begin
  589. widestringmanager.Unicode2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  590. res:=s;
  591. end;
  592. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  593. Var
  594. L : SizeInt;
  595. begin
  596. if (not assigned(p)) or (p[0]=#0) Then
  597. begin
  598. fpc_pchar_to_unicodestr := '';
  599. exit;
  600. end;
  601. l:=IndexChar(p^,-1,#0);
  602. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  603. end;
  604. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  605. var
  606. i : SizeInt;
  607. begin
  608. if zerobased then
  609. begin
  610. if arr[0]=#0 Then
  611. begin
  612. fpc_chararray_to_unicodestr:='';
  613. exit;
  614. end;
  615. i:=IndexChar(arr,high(arr)+1,#0);
  616. if i=-1 then
  617. i:=high(arr)+1;
  618. end
  619. else
  620. i:=high(arr)+1;
  621. SetLength(fpc_CharArray_To_UnicodeStr,i);
  622. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  623. end;
  624. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  625. var
  626. l: longint;
  627. index: ptrint;
  628. len: byte;
  629. temp: ansistring;
  630. begin
  631. l := high(arr)+1;
  632. if l>=high(res)+1 then
  633. l:=high(res)
  634. else if l<0 then
  635. l:=0;
  636. if zerobased then
  637. begin
  638. index:=IndexWord(arr[0],l,0);
  639. if index<0 then
  640. len:=l
  641. else
  642. len:=index;
  643. end
  644. else
  645. len:=l;
  646. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,DefaultSystemCodePage,len);
  647. res:=temp;
  648. end;
  649. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING}zerobased: boolean = true): AnsiString; compilerproc;
  650. var
  651. i : SizeInt;
  652. {$ifndef FPC_HAS_CPSTRING}
  653. cp : TSystemCodePage;
  654. {$endif FPC_HAS_CPSTRING}
  655. begin
  656. {$ifndef FPC_HAS_CPSTRING}
  657. cp:=DefaultSystemCodePage;
  658. {$endif FPC_HAS_CPSTRING}
  659. if (zerobased) then
  660. begin
  661. i:=IndexWord(arr,high(arr)+1,0);
  662. if i = -1 then
  663. i := high(arr)+1;
  664. end
  665. else
  666. i := high(arr)+1;
  667. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  668. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,cp,i);
  669. end;
  670. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  671. var
  672. i : SizeInt;
  673. begin
  674. if (zerobased) then
  675. begin
  676. i:=IndexWord(arr,high(arr)+1,0);
  677. if i = -1 then
  678. i := high(arr)+1;
  679. end
  680. else
  681. i := high(arr)+1;
  682. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  683. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  684. end;
  685. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  686. var
  687. i : SizeInt;
  688. begin
  689. if (zerobased) then
  690. begin
  691. i:=IndexWord(arr,high(arr)+1,0);
  692. if i = -1 then
  693. i := high(arr)+1;
  694. end
  695. else
  696. i := high(arr)+1;
  697. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  698. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  699. end;
  700. { due to their names, the following procedures should be in wstrings.inc,
  701. however, the compiler generates code using this functions on all platforms }
  702. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  703. var
  704. l: longint;
  705. index: ptrint;
  706. len: byte;
  707. temp: ansistring;
  708. begin
  709. l := high(arr)+1;
  710. if l>=high(res)+1 then
  711. l:=high(res)
  712. else if l<0 then
  713. l:=0;
  714. if zerobased then
  715. begin
  716. index:=IndexWord(arr[0],l,0);
  717. if index<0 then
  718. len:=l
  719. else
  720. len:=index;
  721. end
  722. else
  723. len:=l;
  724. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  725. res:=temp;
  726. end;
  727. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  728. var
  729. i : SizeInt;
  730. {$ifndef FPC_HAS_CPSTRING}
  731. cp : TSystemCodePage;
  732. {$endif FPC_HAS_CPSTRING}
  733. begin
  734. {$ifndef FPC_HAS_CPSTRING}
  735. cp:=DefaultSystemCodePage;
  736. {$endif FPC_HAS_CPSTRING}
  737. if (zerobased) then
  738. begin
  739. i:=IndexWord(arr,high(arr)+1,0);
  740. if i = -1 then
  741. i := high(arr)+1;
  742. end
  743. else
  744. i := high(arr)+1;
  745. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  746. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,cp,i);
  747. end;
  748. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  749. var
  750. i : SizeInt;
  751. begin
  752. if (zerobased) then
  753. begin
  754. i:=IndexWord(arr,high(arr)+1,0);
  755. if i = -1 then
  756. i := high(arr)+1;
  757. end
  758. else
  759. i := high(arr)+1;
  760. SetLength(fpc_WideCharArray_To_WideStr,i);
  761. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  762. end;
  763. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  764. var
  765. len: SizeInt;
  766. temp: ansistring;
  767. begin
  768. len := length(src);
  769. { make sure we don't dereference src if it can be nil (JM) }
  770. if len > 0 then
  771. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  772. len := length(temp);
  773. if len > length(res) then
  774. len := length(res);
  775. {$push}
  776. {$r-}
  777. move(temp[1],res[0],len);
  778. fillchar(res[len],length(res)-len,0);
  779. {$pop}
  780. end;
  781. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  782. var
  783. len: SizeInt;
  784. begin
  785. len := length(src);
  786. if len > length(res) then
  787. len := length(res);
  788. {$push}
  789. {$r-}
  790. { make sure we don't try to access element 1 of the ansistring if it's nil }
  791. if len > 0 then
  792. move(src[1],res[0],len*SizeOf(UnicodeChar));
  793. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  794. {$pop}
  795. end;
  796. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  797. var
  798. len: SizeInt;
  799. temp: unicodestring;
  800. begin
  801. len := length(src);
  802. { make sure we don't dereference src if it can be nil (JM) }
  803. if len > 0 then
  804. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  805. len := length(temp);
  806. if len > length(res) then
  807. len := length(res);
  808. {$push}
  809. {$r-}
  810. move(temp[1],res[0],len*sizeof(unicodechar));
  811. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  812. {$pop}
  813. end;
  814. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  815. var
  816. len: longint;
  817. temp : unicodestring;
  818. begin
  819. len := length(src);
  820. { make sure we don't access char 1 if length is 0 (JM) }
  821. if len > 0 then
  822. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  823. len := length(temp);
  824. if len > length(res) then
  825. len := length(res);
  826. {$push}
  827. {$r-}
  828. move(temp[1],res[0],len*sizeof(unicodechar));
  829. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  830. {$pop}
  831. end;
  832. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  833. var
  834. len: SizeInt;
  835. temp: widestring;
  836. begin
  837. len := length(src);
  838. { make sure we don't dereference src if it can be nil (JM) }
  839. if len > 0 then
  840. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  841. len := length(temp);
  842. if len > length(res) then
  843. len := length(res);
  844. {$push}
  845. {$r-}
  846. move(temp[1],res[0],len*sizeof(widechar));
  847. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  848. {$pop}
  849. end;
  850. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  851. var
  852. len: longint;
  853. temp : widestring;
  854. begin
  855. len := length(src);
  856. { make sure we don't access char 1 if length is 0 (JM) }
  857. if len > 0 then
  858. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  859. len := length(temp);
  860. if len > length(res) then
  861. len := length(res);
  862. {$push}
  863. {$r-}
  864. move(temp[1],res[0],len*sizeof(widechar));
  865. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  866. {$pop}
  867. end;
  868. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  869. var
  870. len: SizeInt;
  871. begin
  872. len := length(src);
  873. if len > length(res) then
  874. len := length(res);
  875. {$push}
  876. {$r-}
  877. { make sure we don't try to access element 1 of the widestring if it's nil }
  878. if len > 0 then
  879. move(src[1],res[0],len*SizeOf(WideChar));
  880. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  881. {$pop}
  882. end;
  883. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  884. {
  885. Compares 2 UnicodeStrings;
  886. The result is
  887. <0 if S1<S2
  888. 0 if S1=S2
  889. >0 if S1>S2
  890. }
  891. Var
  892. MaxI,Temp : SizeInt;
  893. begin
  894. if pointer(S1)=pointer(S2) then
  895. begin
  896. fpc_UnicodeStr_Compare:=0;
  897. exit;
  898. end;
  899. Maxi:=Length(S1);
  900. temp:=Length(S2);
  901. If MaxI>Temp then
  902. MaxI:=Temp;
  903. Temp:=CompareWord(S1[1],S2[1],MaxI);
  904. if temp=0 then
  905. temp:=Length(S1)-Length(S2);
  906. fpc_UnicodeStr_Compare:=Temp;
  907. end;
  908. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  909. {
  910. Compares 2 UnicodeStrings for equality only;
  911. The result is
  912. 0 if S1=S2
  913. <>0 if S1<>S2
  914. }
  915. Var
  916. MaxI : SizeInt;
  917. begin
  918. if pointer(S1)=pointer(S2) then
  919. exit(0);
  920. Maxi:=Length(S1);
  921. If MaxI<>Length(S2) then
  922. exit(-1)
  923. else
  924. exit(CompareWord(S1[1],S2[1],MaxI));
  925. end;
  926. {$ifdef VER2_4}
  927. // obsolete but needed for bootstrapping with 2.4
  928. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  929. begin
  930. if p=nil then
  931. HandleErrorFrame(201,get_frame);
  932. end;
  933. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  934. begin
  935. if (index>len) or (Index<1) then
  936. HandleErrorFrame(201,get_frame);
  937. end;
  938. {$else VER2_4}
  939. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  940. begin
  941. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  942. HandleErrorFrame(201,get_frame);
  943. end;
  944. {$endif VER2_4}
  945. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  946. {
  947. Sets The length of string S to L.
  948. Makes sure S is unique, and contains enough room.
  949. }
  950. Var
  951. Temp : Pointer;
  952. movelen: SizeInt;
  953. lens, lena : SizeUInt;
  954. begin
  955. if (l>0) then
  956. begin
  957. if Pointer(S)=nil then
  958. begin
  959. { Need a complete new string...}
  960. Pointer(s):=NewUnicodeString(l);
  961. end
  962. else
  963. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  964. begin
  965. Dec(Pointer(S),UnicodeFirstOff);
  966. lens:=MemSize(Pointer(s));
  967. lena:=SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen);
  968. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  969. reallocmem(pointer(S), lena);
  970. Inc(Pointer(S), UnicodeFirstOff);
  971. end
  972. else
  973. begin
  974. { Reallocation is needed... }
  975. Temp:=Pointer(NewUnicodeString(L));
  976. if Length(S)>0 then
  977. begin
  978. if l < succ(length(s)) then
  979. movelen := l
  980. { also move terminating null }
  981. else
  982. movelen := succ(length(s));
  983. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  984. end;
  985. fpc_unicodestr_decr_ref(Pointer(S));
  986. Pointer(S):=Temp;
  987. end;
  988. { Force nil termination in case it gets shorter }
  989. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  990. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  991. end
  992. else
  993. begin
  994. { Length=0 }
  995. if Pointer(S)<>nil then
  996. fpc_unicodestr_decr_ref (Pointer(S));
  997. Pointer(S):=Nil;
  998. end;
  999. end;
  1000. {*****************************************************************************
  1001. Public functions, In interface.
  1002. *****************************************************************************}
  1003. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  1004. begin
  1005. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1006. end;
  1007. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1008. var
  1009. temp:unicodestring;
  1010. begin
  1011. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1012. if Length(temp)<DestSize then
  1013. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1014. else
  1015. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1016. Dest[DestSize-1]:=#0;
  1017. result:=Dest;
  1018. end;
  1019. function WideCharToString(S : PWideChar) : UnicodeString;
  1020. begin
  1021. result:=WideCharLenToString(s,Length(WideString(s)));
  1022. end;
  1023. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  1024. var
  1025. temp:widestring;
  1026. begin
  1027. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  1028. if Length(temp)<DestSize then
  1029. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  1030. else
  1031. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  1032. Dest[DestSize-1]:=#0;
  1033. result:=Dest;
  1034. end;
  1035. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  1036. begin
  1037. SetLength(result,Len);
  1038. Move(S^,Pointer(Result)^,Len*2);
  1039. end;
  1040. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  1041. begin
  1042. Dest:=UnicodeCharLenToString(Src,Len);
  1043. end;
  1044. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1045. begin
  1046. Dest:=UnicodeCharLenToString(Src,Len);
  1047. end;
  1048. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1049. begin
  1050. Dest:=UnicodeCharToString(S);
  1051. end;
  1052. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  1053. begin
  1054. SetLength(result,Len);
  1055. Move(S^,Pointer(Result)^,Len*2);
  1056. end;
  1057. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  1058. begin
  1059. Dest:=WideCharLenToString(Src,Len);
  1060. end;
  1061. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  1062. begin
  1063. Dest:=WideCharLenToString(Src,Len);
  1064. end;
  1065. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  1066. begin
  1067. Dest:=WideCharToString(S);
  1068. end;
  1069. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1070. begin
  1071. Dest:=WideCharToString(S);
  1072. end;
  1073. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1074. {
  1075. Make sure reference count of S is 1,
  1076. using copy-on-write semantics.
  1077. }
  1078. Var
  1079. SNew : Pointer;
  1080. L : SizeInt;
  1081. begin
  1082. pointer(result) := pointer(s);
  1083. If Pointer(S)=Nil then
  1084. exit;
  1085. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1086. begin
  1087. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1088. SNew:=NewUnicodeString (L);
  1089. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1090. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1091. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1092. pointer(S):=SNew;
  1093. pointer(result):=SNew;
  1094. end;
  1095. end;
  1096. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1097. var
  1098. ResultAddress : Pointer;
  1099. begin
  1100. ResultAddress:=Nil;
  1101. dec(index);
  1102. if Index < 0 then
  1103. Index := 0;
  1104. { Check Size. Accounts for Zero-length S, the double check is needed because
  1105. Size can be maxint and will get <0 when adding index }
  1106. if (Size>Length(S)) or
  1107. (Index+Size>Length(S)) then
  1108. Size:=Length(S)-Index;
  1109. If Size>0 then
  1110. begin
  1111. If Index<0 Then
  1112. Index:=0;
  1113. ResultAddress:=Pointer(NewUnicodeString (Size));
  1114. if ResultAddress<>Nil then
  1115. begin
  1116. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1117. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1118. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1119. end;
  1120. end;
  1121. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1122. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1123. end;
  1124. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1125. var
  1126. i,MaxLen : SizeInt;
  1127. pc : punicodechar;
  1128. begin
  1129. Pos:=0;
  1130. if Length(SubStr)>0 then
  1131. begin
  1132. MaxLen:=Length(source)-Length(SubStr);
  1133. i:=0;
  1134. pc:=@source[1];
  1135. while (i<=MaxLen) do
  1136. begin
  1137. inc(i);
  1138. if (SubStr[1]=pc^) and
  1139. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1140. begin
  1141. Pos:=i;
  1142. exit;
  1143. end;
  1144. inc(pc);
  1145. end;
  1146. end;
  1147. end;
  1148. { Faster version for a unicodechar alone }
  1149. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1150. var
  1151. i: SizeInt;
  1152. pc : punicodechar;
  1153. begin
  1154. pc:=@s[1];
  1155. for i:=1 to length(s) do
  1156. begin
  1157. if pc^=c then
  1158. begin
  1159. pos:=i;
  1160. exit;
  1161. end;
  1162. inc(pc);
  1163. end;
  1164. pos:=0;
  1165. end;
  1166. Function Pos (c : RawByteString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1167. begin
  1168. result:=Pos(UnicodeString(c),s);
  1169. end;
  1170. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1171. begin
  1172. result:=Pos(UnicodeString(c),s);
  1173. end;
  1174. Function Pos (c : UnicodeString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1175. begin
  1176. result:=Pos(c,UnicodeString(s));
  1177. end;
  1178. { Faster version for a char alone. Must be implemented because }
  1179. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1180. { using pos(char,pchar) will always call the shortstring version }
  1181. { (exact match for first argument), also with $h+ (JM) }
  1182. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1183. var
  1184. i: SizeInt;
  1185. wc : unicodechar;
  1186. pc : punicodechar;
  1187. begin
  1188. wc:=c;
  1189. pc:=@s[1];
  1190. for i:=1 to length(s) do
  1191. begin
  1192. if pc^=wc then
  1193. begin
  1194. pos:=i;
  1195. exit;
  1196. end;
  1197. inc(pc);
  1198. end;
  1199. pos:=0;
  1200. end;
  1201. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1202. Var
  1203. LS : SizeInt;
  1204. begin
  1205. LS:=Length(S);
  1206. if (Index>LS) or (Index<=0) or (Size<=0) then
  1207. exit;
  1208. UniqueString (S);
  1209. { (Size+Index) will overflow if Size=MaxInt. }
  1210. if Size>LS-Index then
  1211. Size:=LS-Index+1;
  1212. if Size<=LS-Index then
  1213. begin
  1214. Dec(Index);
  1215. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1216. end;
  1217. Setlength(s,LS-Size);
  1218. end;
  1219. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1220. var
  1221. Temp : UnicodeString;
  1222. LS : SizeInt;
  1223. begin
  1224. If Length(Source)=0 then
  1225. exit;
  1226. if index <= 0 then
  1227. index := 1;
  1228. Ls:=Length(S);
  1229. if index > LS then
  1230. index := LS+1;
  1231. Dec(Index);
  1232. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1233. SetLength(Temp,Length(Source)+LS);
  1234. If Index>0 then
  1235. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1236. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1237. If (LS-Index)>0 then
  1238. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1239. S:=Temp;
  1240. end;
  1241. Function UpCase(c:UnicodeChar):UnicodeChar;
  1242. var
  1243. s : UnicodeString;
  1244. begin
  1245. s:=c;
  1246. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1247. end;
  1248. function UpCase(const s : UnicodeString) : UnicodeString;
  1249. begin
  1250. result:=widestringmanager.UpperUnicodeStringProc(s);
  1251. end;
  1252. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1253. begin
  1254. SetLength(S,Len);
  1255. If (Buf<>Nil) and (Len>0) then
  1256. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1257. end;
  1258. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1259. var
  1260. BufLen: SizeInt;
  1261. begin
  1262. SetLength(S,Len);
  1263. If (Buf<>Nil) and (Len>0) then
  1264. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len);
  1265. end;
  1266. {$ifndef FPUNONE}
  1267. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1268. Var
  1269. SS : String;
  1270. begin
  1271. fpc_Val_Real_UnicodeStr := 0;
  1272. if length(S) > 255 then
  1273. code := 256
  1274. else
  1275. begin
  1276. SS := S;
  1277. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1278. end;
  1279. end;
  1280. {$endif}
  1281. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1282. var ss:shortstring;
  1283. begin
  1284. if length(s)>255 then
  1285. code:=256
  1286. else
  1287. begin
  1288. ss:=s;
  1289. val(ss,fpc_val_enum_unicodestr,code);
  1290. end;
  1291. end;
  1292. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1293. Var
  1294. SS : String;
  1295. begin
  1296. if length(S) > 255 then
  1297. begin
  1298. fpc_Val_Currency_UnicodeStr:=0;
  1299. code := 256;
  1300. end
  1301. else
  1302. begin
  1303. SS := S;
  1304. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1305. end;
  1306. end;
  1307. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1308. Var
  1309. SS : ShortString;
  1310. begin
  1311. fpc_Val_UInt_UnicodeStr := 0;
  1312. if length(S) > 255 then
  1313. code := 256
  1314. else
  1315. begin
  1316. SS := S;
  1317. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1318. end;
  1319. end;
  1320. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1321. Var
  1322. SS : ShortString;
  1323. begin
  1324. fpc_Val_SInt_UnicodeStr:=0;
  1325. if length(S)>255 then
  1326. code:=256
  1327. else
  1328. begin
  1329. SS := S;
  1330. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1331. end;
  1332. end;
  1333. {$ifndef CPU64}
  1334. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1335. Var
  1336. SS : ShortString;
  1337. begin
  1338. fpc_Val_qword_UnicodeStr:=0;
  1339. if length(S)>255 then
  1340. code:=256
  1341. else
  1342. begin
  1343. SS := S;
  1344. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1345. end;
  1346. end;
  1347. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1348. Var
  1349. SS : ShortString;
  1350. begin
  1351. fpc_Val_int64_UnicodeStr:=0;
  1352. if length(S)>255 then
  1353. code:=256
  1354. else
  1355. begin
  1356. SS := S;
  1357. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1358. end;
  1359. end;
  1360. {$endif CPU64}
  1361. {$ifndef FPUNONE}
  1362. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1363. var
  1364. ss : shortstring;
  1365. begin
  1366. str_real(len,fr,d,treal_type(rt),ss);
  1367. s:=ss;
  1368. end;
  1369. {$endif}
  1370. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1371. var ss:shortstring;
  1372. begin
  1373. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1374. s:=ss;
  1375. end;
  1376. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1377. var ss:shortstring;
  1378. begin
  1379. fpc_shortstr_bool(b,len,ss);
  1380. s:=ss;
  1381. end;
  1382. {$ifdef FPC_HAS_STR_CURRENCY}
  1383. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1384. var
  1385. ss : shortstring;
  1386. begin
  1387. str(c:len:fr,ss);
  1388. s:=ss;
  1389. end;
  1390. {$endif FPC_HAS_STR_CURRENCY}
  1391. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1392. Var
  1393. SS : ShortString;
  1394. begin
  1395. Str (v:Len,SS);
  1396. S:=SS;
  1397. end;
  1398. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1399. Var
  1400. SS : ShortString;
  1401. begin
  1402. str(v:Len,SS);
  1403. S:=SS;
  1404. end;
  1405. {$ifndef CPU64}
  1406. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1407. Var
  1408. SS : ShortString;
  1409. begin
  1410. Str (v:Len,SS);
  1411. S:=SS;
  1412. end;
  1413. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1414. Var
  1415. SS : ShortString;
  1416. begin
  1417. str(v:Len,SS);
  1418. S:=SS;
  1419. end;
  1420. {$endif CPU64}
  1421. { converts an utf-16 code point or surrogate pair to utf-32 }
  1422. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1423. var
  1424. w: unicodechar;
  1425. begin
  1426. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1427. { are the same in UTF-32 }
  1428. w:=s[index];
  1429. if (w<=#$d7ff) or
  1430. (w>=#$e000) then
  1431. begin
  1432. result:=UCS4Char(w);
  1433. len:=1;
  1434. end
  1435. { valid surrogate pair? }
  1436. else if (w<=#$dbff) and
  1437. { w>=#$d7ff check not needed, checked above }
  1438. (index<length(s)) and
  1439. (s[index+1]>=#$dc00) and
  1440. (s[index+1]<=#$dfff) then
  1441. { convert the surrogate pair to UTF-32 }
  1442. begin
  1443. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1444. len:=2;
  1445. end
  1446. else
  1447. { invalid surrogate -> do nothing }
  1448. begin
  1449. result:=UCS4Char(w);
  1450. len:=1;
  1451. end;
  1452. end;
  1453. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1454. begin
  1455. if assigned(Source) then
  1456. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1457. else
  1458. Result:=0;
  1459. end;
  1460. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1461. var
  1462. i,j : SizeUInt;
  1463. w : word;
  1464. lw : longword;
  1465. len : longint;
  1466. begin
  1467. result:=0;
  1468. if source=nil then
  1469. exit;
  1470. i:=0;
  1471. j:=0;
  1472. if assigned(Dest) then
  1473. begin
  1474. while (i<SourceChars) and (j<MaxDestBytes) do
  1475. begin
  1476. w:=word(Source[i]);
  1477. case w of
  1478. 0..$7f:
  1479. begin
  1480. Dest[j]:=char(w);
  1481. inc(j);
  1482. end;
  1483. $80..$7ff:
  1484. begin
  1485. if j+1>=MaxDestBytes then
  1486. break;
  1487. Dest[j]:=char($c0 or (w shr 6));
  1488. Dest[j+1]:=char($80 or (w and $3f));
  1489. inc(j,2);
  1490. end;
  1491. $800..$d7ff,$e000..$ffff:
  1492. begin
  1493. if j+2>=MaxDestBytes then
  1494. break;
  1495. Dest[j]:=char($e0 or (w shr 12));
  1496. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1497. Dest[j+2]:=char($80 or (w and $3f));
  1498. inc(j,3);
  1499. end;
  1500. $d800..$dbff:
  1501. {High Surrogates}
  1502. begin
  1503. if j+3>=MaxDestBytes then
  1504. break;
  1505. if (i<sourcechars-1) and
  1506. (word(Source[i+1]) >= $dc00) and
  1507. (word(Source[i+1]) <= $dfff) then
  1508. begin
  1509. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1510. Dest[j]:=char($f0 or (lw shr 18));
  1511. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1512. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1513. Dest[j+3]:=char($80 or (lw and $3f));
  1514. inc(j,4);
  1515. inc(i);
  1516. end;
  1517. end;
  1518. end;
  1519. inc(i);
  1520. end;
  1521. if j>SizeUInt(MaxDestBytes-1) then
  1522. j:=MaxDestBytes-1;
  1523. Dest[j]:=#0;
  1524. end
  1525. else
  1526. begin
  1527. while i<SourceChars do
  1528. begin
  1529. case word(Source[i]) of
  1530. $0..$7f:
  1531. inc(j);
  1532. $80..$7ff:
  1533. inc(j,2);
  1534. $800..$d7ff,$e000..$ffff:
  1535. inc(j,3);
  1536. $d800..$dbff:
  1537. begin
  1538. if (i<sourcechars-1) and
  1539. (word(Source[i+1]) >= $dc00) and
  1540. (word(Source[i+1]) <= $dfff) then
  1541. begin
  1542. inc(j,4);
  1543. inc(i);
  1544. end;
  1545. end;
  1546. end;
  1547. inc(i);
  1548. end;
  1549. end;
  1550. result:=j+1;
  1551. end;
  1552. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1553. begin
  1554. if assigned(Source) then
  1555. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1556. else
  1557. Result:=0;
  1558. end;
  1559. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1560. const
  1561. UNICODE_INVALID=63;
  1562. var
  1563. InputUTF8: SizeUInt;
  1564. IBYTE: BYTE;
  1565. OutputUnicode: SizeUInt;
  1566. PRECHAR: SizeUInt;
  1567. TempBYTE: BYTE;
  1568. CharLen: SizeUint;
  1569. LookAhead: SizeUInt;
  1570. UC: SizeUInt;
  1571. begin
  1572. if not assigned(Source) then
  1573. begin
  1574. result:=0;
  1575. exit;
  1576. end;
  1577. result:=SizeUInt(-1);
  1578. InputUTF8:=0;
  1579. OutputUnicode:=0;
  1580. PreChar:=0;
  1581. if Assigned(Dest) Then
  1582. begin
  1583. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1584. begin
  1585. IBYTE:=byte(Source[InputUTF8]);
  1586. if (IBYTE and $80) = 0 then
  1587. begin
  1588. //One character US-ASCII, convert it to unicode
  1589. if IBYTE = 10 then
  1590. begin
  1591. If (PreChar<>13) and FALSE then
  1592. begin
  1593. //Expand to crlf, conform UTF-8.
  1594. //This procedure will break the memory alocation by
  1595. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1596. if OutputUnicode+1<MaxDestChars then
  1597. begin
  1598. Dest[OutputUnicode]:=WideChar(13);
  1599. inc(OutputUnicode);
  1600. Dest[OutputUnicode]:=WideChar(10);
  1601. inc(OutputUnicode);
  1602. PreChar:=10;
  1603. end
  1604. else
  1605. begin
  1606. Dest[OutputUnicode]:=WideChar(13);
  1607. inc(OutputUnicode);
  1608. end;
  1609. end
  1610. else
  1611. begin
  1612. Dest[OutputUnicode]:=WideChar(IBYTE);
  1613. inc(OutputUnicode);
  1614. PreChar:=IBYTE;
  1615. end;
  1616. end
  1617. else
  1618. begin
  1619. Dest[OutputUnicode]:=WideChar(IBYTE);
  1620. inc(OutputUnicode);
  1621. PreChar:=IBYTE;
  1622. end;
  1623. inc(InputUTF8);
  1624. end
  1625. else
  1626. begin
  1627. TempByte:=IBYTE;
  1628. CharLen:=0;
  1629. while (TempBYTE and $80)<>0 do
  1630. begin
  1631. TempBYTE:=(TempBYTE shl 1) and $FE;
  1632. inc(CharLen);
  1633. end;
  1634. //Test for the "CharLen" conforms UTF-8 string
  1635. //This means the 10xxxxxx pattern.
  1636. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1637. begin
  1638. //Insuficient chars in string to decode
  1639. //UTF-8 array. Fallback to single char.
  1640. CharLen:= 1;
  1641. end;
  1642. for LookAhead := 1 to CharLen-1 do
  1643. begin
  1644. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1645. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1646. begin
  1647. //Invalid UTF-8 sequence, fallback.
  1648. CharLen:= LookAhead;
  1649. break;
  1650. end;
  1651. end;
  1652. UC:=$FFFF;
  1653. case CharLen of
  1654. 1: begin
  1655. //Not valid UTF-8 sequence
  1656. UC:=UNICODE_INVALID;
  1657. end;
  1658. 2: begin
  1659. //Two bytes UTF, convert it
  1660. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1661. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1662. if UC <= $7F then
  1663. begin
  1664. //Invalid UTF sequence.
  1665. UC:=UNICODE_INVALID;
  1666. end;
  1667. end;
  1668. 3: begin
  1669. //Three bytes, convert it to unicode
  1670. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1671. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1672. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1673. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1674. begin
  1675. //Invalid UTF-8 sequence
  1676. UC:= UNICODE_INVALID;
  1677. End;
  1678. end;
  1679. 4: begin
  1680. //Four bytes, convert it to two unicode characters
  1681. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1682. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1683. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1684. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1685. if (UC < $10000) or (UC > $10FFFF) then
  1686. begin
  1687. UC:= UNICODE_INVALID;
  1688. end
  1689. else
  1690. begin
  1691. { only store pair if room }
  1692. dec(UC,$10000);
  1693. if (OutputUnicode<MaxDestChars-1) then
  1694. begin
  1695. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1696. inc(OutputUnicode);
  1697. UC:=(UC and $3ff) + $DC00;
  1698. end
  1699. else
  1700. begin
  1701. InputUTF8:= InputUTF8 + CharLen;
  1702. { don't store anything }
  1703. CharLen:=0;
  1704. end;
  1705. end;
  1706. end;
  1707. 5,6,7: begin
  1708. //Invalid UTF8 to unicode conversion,
  1709. //mask it as invalid UNICODE too.
  1710. UC:=UNICODE_INVALID;
  1711. end;
  1712. end;
  1713. if CharLen > 0 then
  1714. begin
  1715. PreChar:=UC;
  1716. Dest[OutputUnicode]:=WideChar(UC);
  1717. inc(OutputUnicode);
  1718. end;
  1719. InputUTF8:= InputUTF8 + CharLen;
  1720. end;
  1721. end;
  1722. Result:=OutputUnicode+1;
  1723. end
  1724. else
  1725. begin
  1726. while (InputUTF8<SourceBytes) do
  1727. begin
  1728. IBYTE:=byte(Source[InputUTF8]);
  1729. if (IBYTE and $80) = 0 then
  1730. begin
  1731. //One character US-ASCII, convert it to unicode
  1732. if IBYTE = 10 then
  1733. begin
  1734. if (PreChar<>13) and FALSE then
  1735. begin
  1736. //Expand to crlf, conform UTF-8.
  1737. //This procedure will break the memory alocation by
  1738. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1739. inc(OutputUnicode,2);
  1740. PreChar:=10;
  1741. end
  1742. else
  1743. begin
  1744. inc(OutputUnicode);
  1745. PreChar:=IBYTE;
  1746. end;
  1747. end
  1748. else
  1749. begin
  1750. inc(OutputUnicode);
  1751. PreChar:=IBYTE;
  1752. end;
  1753. inc(InputUTF8);
  1754. end
  1755. else
  1756. begin
  1757. TempByte:=IBYTE;
  1758. CharLen:=0;
  1759. while (TempBYTE and $80)<>0 do
  1760. begin
  1761. TempBYTE:=(TempBYTE shl 1) and $FE;
  1762. inc(CharLen);
  1763. end;
  1764. //Test for the "CharLen" conforms UTF-8 string
  1765. //This means the 10xxxxxx pattern.
  1766. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1767. begin
  1768. //Insuficient chars in string to decode
  1769. //UTF-8 array. Fallback to single char.
  1770. CharLen:= 1;
  1771. end;
  1772. for LookAhead := 1 to CharLen-1 do
  1773. begin
  1774. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1775. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1776. begin
  1777. //Invalid UTF-8 sequence, fallback.
  1778. CharLen:= LookAhead;
  1779. break;
  1780. end;
  1781. end;
  1782. UC:=$FFFF;
  1783. case CharLen of
  1784. 1: begin
  1785. //Not valid UTF-8 sequence
  1786. UC:=UNICODE_INVALID;
  1787. end;
  1788. 2: begin
  1789. //Two bytes UTF, convert it
  1790. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1791. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1792. if UC <= $7F then
  1793. begin
  1794. //Invalid UTF sequence.
  1795. UC:=UNICODE_INVALID;
  1796. end;
  1797. end;
  1798. 3: begin
  1799. //Three bytes, convert it to unicode
  1800. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1801. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1802. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1803. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1804. begin
  1805. //Invalid UTF-8 sequence
  1806. UC:= UNICODE_INVALID;
  1807. end;
  1808. end;
  1809. 4: begin
  1810. //Four bytes, convert it to two unicode characters
  1811. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1812. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1813. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1814. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1815. if (UC < $10000) or (UC > $10FFFF) then
  1816. UC:= UNICODE_INVALID
  1817. else
  1818. { extra character character }
  1819. inc(OutputUnicode);
  1820. end;
  1821. 5,6,7: begin
  1822. //Invalid UTF8 to unicode conversion,
  1823. //mask it as invalid UNICODE too.
  1824. UC:=UNICODE_INVALID;
  1825. end;
  1826. end;
  1827. if CharLen > 0 then
  1828. begin
  1829. PreChar:=UC;
  1830. inc(OutputUnicode);
  1831. end;
  1832. InputUTF8:= InputUTF8 + CharLen;
  1833. end;
  1834. end;
  1835. Result:=OutputUnicode+1;
  1836. end;
  1837. end;
  1838. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1839. begin
  1840. Result:=UTF8Encode(UnicodeString(s));
  1841. end;
  1842. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1843. var
  1844. i : SizeInt;
  1845. hs : UTF8String;
  1846. begin
  1847. result:='';
  1848. if s='' then
  1849. exit;
  1850. SetLength(hs,length(s)*3);
  1851. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1852. if i>0 then
  1853. begin
  1854. SetLength(hs,i-1);
  1855. result:=hs;
  1856. end;
  1857. end;
  1858. function UTF8Decode(const s : RawByteString): UnicodeString;
  1859. var
  1860. i : SizeInt;
  1861. hs : UnicodeString;
  1862. begin
  1863. result:='';
  1864. if s='' then
  1865. exit;
  1866. SetLength(hs,length(s));
  1867. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1868. if i>0 then
  1869. begin
  1870. SetLength(hs,i-1);
  1871. result:=hs;
  1872. end;
  1873. end;
  1874. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1875. begin
  1876. Result:=Utf8Encode(s);
  1877. end;
  1878. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1879. begin
  1880. Result:=Utf8Decode(s);
  1881. end;
  1882. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1883. var
  1884. i, slen,
  1885. destindex : SizeInt;
  1886. len : longint;
  1887. begin
  1888. slen:=length(s);
  1889. setlength(result,slen+1);
  1890. i:=1;
  1891. destindex:=0;
  1892. while (i<=slen) do
  1893. begin
  1894. result[destindex]:=utf16toutf32(s,i,len);
  1895. inc(destindex);
  1896. inc(i,len);
  1897. end;
  1898. { destindex <= slen (surrogate pairs may have been merged) }
  1899. { destindex+1 for terminating #0 (dynamic arrays are }
  1900. { implicitely filled with zero) }
  1901. setlength(result,destindex+1);
  1902. end;
  1903. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  1904. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  1905. var
  1906. p : PUnicodeChar;
  1907. begin
  1908. { if nc > $ffff, we need two places }
  1909. if (index+ord(nc > $ffff)>length(s)) then
  1910. if (length(s) < 10*256) then
  1911. setlength(s,length(s)+10)
  1912. else
  1913. setlength(s,length(s)+length(s) shr 8);
  1914. { we know that s is unique -> avoid uniquestring calls}
  1915. p:=@s[index];
  1916. if (nc<$ffff) then
  1917. begin
  1918. p^:=unicodechar(nc);
  1919. inc(index);
  1920. end
  1921. else if (dword(nc)<=$10ffff) then
  1922. begin
  1923. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  1924. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  1925. inc(index,2);
  1926. end
  1927. else
  1928. { invalid code point }
  1929. begin
  1930. p^:='?';
  1931. inc(index);
  1932. end;
  1933. end;
  1934. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1935. var
  1936. i : SizeInt;
  1937. resindex : SizeInt;
  1938. begin
  1939. { skip terminating #0 }
  1940. SetLength(result,length(s)-1);
  1941. resindex:=1;
  1942. for i:=0 to high(s)-1 do
  1943. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  1944. { adjust result length (may be too big due to growing }
  1945. { for surrogate pairs) }
  1946. setlength(result,resindex-1);
  1947. end;
  1948. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1949. var
  1950. i, slen,
  1951. destindex : SizeInt;
  1952. len : longint;
  1953. begin
  1954. slen:=length(s);
  1955. setlength(result,slen+1);
  1956. i:=1;
  1957. destindex:=0;
  1958. while (i<=slen) do
  1959. begin
  1960. result[destindex]:=utf16toutf32(s,i,len);
  1961. inc(destindex);
  1962. inc(i,len);
  1963. end;
  1964. { destindex <= slen (surrogate pairs may have been merged) }
  1965. { destindex+1 for terminating #0 (dynamic arrays are }
  1966. { implicitely filled with zero) }
  1967. setlength(result,destindex+1);
  1968. end;
  1969. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  1970. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  1971. var
  1972. p : PWideChar;
  1973. begin
  1974. { if nc > $ffff, we need two places }
  1975. if (index+ord(nc > $ffff)>length(s)) then
  1976. if (length(s) < 10*256) then
  1977. setlength(s,length(s)+10)
  1978. else
  1979. setlength(s,length(s)+length(s) shr 8);
  1980. { we know that s is unique -> avoid uniquestring calls}
  1981. p:=@s[index];
  1982. if (nc<$ffff) then
  1983. begin
  1984. p^:=widechar(nc);
  1985. inc(index);
  1986. end
  1987. else if (dword(nc)<=$10ffff) then
  1988. begin
  1989. p^:=widechar((nc - $10000) shr 10 + $d800);
  1990. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  1991. inc(index,2);
  1992. end
  1993. else
  1994. { invalid code point }
  1995. begin
  1996. p^:='?';
  1997. inc(index);
  1998. end;
  1999. end;
  2000. function UCS4StringToWideString(const s : UCS4String) : WideString;
  2001. var
  2002. i : SizeInt;
  2003. resindex : SizeInt;
  2004. begin
  2005. { skip terminating #0 }
  2006. SetLength(result,length(s)-1);
  2007. resindex:=1;
  2008. for i:=0 to high(s)-1 do
  2009. ConcatUTF32ToWideStr(s[i],result,resindex);
  2010. { adjust result length (may be too big due to growing }
  2011. { for surrogate pairs) }
  2012. setlength(result,resindex-1);
  2013. end;
  2014. const
  2015. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  2016. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  2017. procedure unimplementedunicodestring;
  2018. begin
  2019. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2020. If IsConsole then
  2021. begin
  2022. Writeln(StdErr,SNoUnicodestrings);
  2023. Writeln(StdErr,SRecompileWithUnicodestrings);
  2024. end;
  2025. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2026. HandleErrorFrame(233,get_frame);
  2027. end;
  2028. function StringElementSize(const S: UnicodeString): Word; overload;
  2029. begin
  2030. if assigned(Pointer(S)) then
  2031. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2032. else
  2033. Result:=SizeOf(UnicodeChar);
  2034. end;
  2035. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2036. begin
  2037. if assigned(Pointer(S)) then
  2038. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2039. else
  2040. Result:=0;
  2041. end;
  2042. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2043. begin
  2044. {$ifdef FPC_HAS_CPSTRING}
  2045. if assigned(Pointer(S)) then
  2046. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2047. else
  2048. {$endif FPC_HAS_CPSTRING}
  2049. Result:=DefaultUnicodeCodePage;
  2050. end;
  2051. {$warnings off}
  2052. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  2053. begin
  2054. unimplementedunicodestring;
  2055. end;
  2056. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  2057. begin
  2058. unimplementedunicodestring;
  2059. end;
  2060. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  2061. begin
  2062. unimplementedunicodestring;
  2063. end;
  2064. {$warnings on}
  2065. procedure initunicodestringmanager;
  2066. begin
  2067. {$ifndef HAS_WIDESTRINGMANAGER}
  2068. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2069. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2070. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  2071. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  2072. {$endif HAS_WIDESTRINGMANAGER}
  2073. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  2074. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  2075. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2076. {$ifndef HAS_WIDESTRINGMANAGER}
  2077. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  2078. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2079. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  2080. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  2081. {$endif HAS_WIDESTRINGMANAGER}
  2082. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  2083. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  2084. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2085. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2086. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2087. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2088. end;