ustrings.inc 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to :
  19. @-8 : SizeInt for reference count;
  20. @-4 : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$ifdef CPU64}
  34. { align fields }
  35. Dummy : DWord;
  36. {$endif CPU64}
  37. Ref : SizeInt;
  38. Len : SizeInt;
  39. end;
  40. Const
  41. UnicodeFirstOff = SizeOf(TUnicodeRec);
  42. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  43. {
  44. Default UnicodeChar <-> Char conversion is to only convert the
  45. lower 127 chars, all others are translated to '?'.
  46. These routines can be overridden for the Current Locale
  47. }
  48. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  49. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  50. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  51. var
  52. i : SizeInt;
  53. p : PAnsiChar;
  54. begin
  55. setlength(dest,len);
  56. p:=pointer(dest); {SetLength guarantees that dest is unique}
  57. for i:=1 to len do
  58. begin
  59. if word(source^)<256 then
  60. p^:=char(word(source^))
  61. else
  62. p^:='?';
  63. inc(source);
  64. inc(p);
  65. end;
  66. end;
  67. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  68. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  69. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  70. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  71. var
  72. i : SizeInt;
  73. p : PUnicodeChar;
  74. begin
  75. setlength(dest,len);
  76. p:=pointer(dest); {SetLength guarantees that dest is unique}
  77. for i:=1 to len do
  78. begin
  79. p^:=unicodechar(byte(source^));
  80. inc(source);
  81. inc(p);
  82. end;
  83. end;
  84. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  85. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  86. function DefaultCharLengthPChar(const Str: PChar): PtrInt;
  87. begin
  88. DefaultCharLengthPChar:=length(Str);
  89. end;
  90. function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  91. begin
  92. if str[0]<>#0 then
  93. DefaultCodePointLength:=1
  94. else
  95. DefaultCodePointLength:=0;
  96. end;
  97. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  98. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  99. begin
  100. { don't raise an exception here. We need this for text file handling }
  101. Result:=DefaultSystemCodePage;
  102. end;
  103. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  104. begin
  105. manager:=widestringmanager;
  106. end;
  107. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  108. begin
  109. Old:=widestringmanager;
  110. widestringmanager:=New;
  111. end;
  112. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  113. begin
  114. widestringmanager:=New;
  115. end;
  116. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  117. begin
  118. manager:=widestringmanager;
  119. end;
  120. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  121. begin
  122. Old:=widestringmanager;
  123. widestringmanager:=New;
  124. end;
  125. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  126. begin
  127. widestringmanager:=New;
  128. end;
  129. {****************************************************************************
  130. Internal functions, not in interface.
  131. ****************************************************************************}
  132. procedure UnicodeStringError;
  133. begin
  134. HandleErrorFrame(204,get_frame);
  135. end;
  136. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  137. {$define FPC_HAS_NEW_UNICODESTRING}
  138. Function NewUnicodeString(Len : SizeInt) : Pointer;
  139. {
  140. Allocate a new UnicodeString on the heap.
  141. initialize it to zero length and reference count 1.
  142. }
  143. Var
  144. P : Pointer;
  145. begin
  146. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  147. If P<>Nil then
  148. begin
  149. PUnicodeRec(P)^.Len:=Len; { Initial length }
  150. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  151. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  152. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  153. inc(p,UnicodeFirstOff); { Points to string now }
  154. PUnicodeChar(P)^:=#0; { Terminating #0 }
  155. end
  156. else
  157. UnicodeStringError;
  158. NewUnicodeString:=P;
  159. end;
  160. {$endif FPC_HAS_NEW_UNICODESTRING}
  161. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  162. {$define FPC_HAS_UNICODESTR_DECR_REF}
  163. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  164. {
  165. Decreases the ReferenceCount of a non constant unicodestring;
  166. If the reference count is zero, deallocate the string;
  167. }
  168. Var
  169. p: PUnicodeRec;
  170. Begin
  171. { Zero string }
  172. if S=Nil then
  173. exit;
  174. { check for constant strings ...}
  175. p:=PUnicodeRec(S-UnicodeFirstOff);
  176. S:=nil;
  177. if p^.Ref<0 then
  178. exit;
  179. { declocked does a MT safe dec and returns true, if the counter is 0 }
  180. if declocked(p^.Ref) then
  181. FreeMem(p);
  182. end;
  183. { alias for internal use }
  184. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  185. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  186. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  187. {$define FPC_HAS_UNICODESTR_INCR_REF}
  188. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  189. Begin
  190. If S=Nil then
  191. exit;
  192. { constant string ? }
  193. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  194. exit;
  195. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  196. end;
  197. { alias for internal use }
  198. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  199. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  200. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  201. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  202. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  203. {
  204. Converts a UnicodeString to a ShortString;
  205. }
  206. Var
  207. Size : SizeInt;
  208. temp : ansistring;
  209. begin
  210. res:='';
  211. Size:=Length(S2);
  212. if Size>0 then
  213. begin
  214. If Size>high(res) then
  215. Size:=high(res);
  216. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  217. res:=temp;
  218. end;
  219. end;
  220. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  221. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  222. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  223. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  224. {
  225. Converts a ShortString to a UnicodeString;
  226. }
  227. Var
  228. Size : SizeInt;
  229. begin
  230. result:='';
  231. Size:=Length(S2);
  232. if Size>0 then
  233. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
  234. end;
  235. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  236. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  237. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  238. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  239. {
  240. Converts a UnicodeString to an AnsiString
  241. }
  242. Var
  243. Size : SizeInt;
  244. {$ifndef FPC_HAS_CPSTRING}
  245. cp : TSystemCodePage;
  246. {$endif FPC_HAS_CPSTRING}
  247. begin
  248. {$ifndef FPC_HAS_CPSTRING}
  249. cp:=DefaultSystemCodePage;
  250. {$endif FPC_HAS_CPSTRING}
  251. result:='';
  252. Size:=Length(S2);
  253. if Size>0 then
  254. begin
  255. if (cp=CP_ACP) then
  256. cp:=DefaultSystemCodePage;
  257. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  258. end;
  259. end;
  260. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  261. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  262. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  263. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  264. {
  265. Converts an AnsiString to a UnicodeString;
  266. }
  267. Var
  268. Size : SizeInt;
  269. cp: TSystemCodePage;
  270. begin
  271. result:='';
  272. Size:=Length(S2);
  273. if Size>0 then
  274. begin
  275. cp:=StringCodePage(S2);
  276. if (cp=CP_ACP) then
  277. cp:=DefaultSystemCodePage;
  278. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),cp,result,Size);
  279. end;
  280. end;
  281. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  282. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  283. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  284. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  285. begin
  286. SetLength(Result,Length(S2));
  287. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  288. end;
  289. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  290. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  291. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  292. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  293. begin
  294. SetLength(Result,Length(S2));
  295. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  296. end;
  297. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  298. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  299. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  300. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  301. var
  302. Size : SizeInt;
  303. begin
  304. result:='';
  305. if p=nil then
  306. exit;
  307. Size := IndexWord(p^, -1, 0);
  308. Setlength(result,Size);
  309. if Size>0 then
  310. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  311. end;
  312. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  313. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  314. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  315. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  316. var
  317. Size : SizeInt;
  318. {$ifndef FPC_HAS_CPSTRING}
  319. cp : TSystemCodePage;
  320. {$endif FPC_HAS_CPSTRING}
  321. begin
  322. {$ifndef FPC_HAS_CPSTRING}
  323. cp:=DefaultSystemCodePage;
  324. {$endif FPC_HAS_CPSTRING}
  325. result:='';
  326. if p=nil then
  327. exit;
  328. Size := IndexWord(p^, -1, 0);
  329. if Size>0 then
  330. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  331. end;
  332. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  333. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  334. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  335. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  336. var
  337. Size : SizeInt;
  338. temp: ansistring;
  339. begin
  340. res:='';
  341. if p=nil then
  342. exit;
  343. Size:=IndexWord(p^, high(PtrInt), 0);
  344. if Size>0 then
  345. begin
  346. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  347. res:=temp;
  348. end;
  349. end;
  350. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  351. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  352. {$define FPC_UNICODESTR_ASSIGN}
  353. { checked against the ansistring routine, 2001-05-27 (FK) }
  354. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  355. {
  356. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  357. }
  358. begin
  359. If S2<>nil then
  360. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  361. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  362. { Decrease the reference count on the old S1 }
  363. fpc_unicodestr_decr_ref (S1);
  364. s1:=s2;
  365. end;
  366. { alias for internal use }
  367. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  368. {$endif FPC_UNICODESTR_ASSIGN}
  369. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  370. {$define FPC_HAS_UNICODESTR_CONCAT}
  371. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  372. Var
  373. Size,Location : SizeInt;
  374. same : boolean;
  375. begin
  376. { only assign if s1 or s2 is empty }
  377. if (S1='') then
  378. begin
  379. DestS:=s2;
  380. exit;
  381. end;
  382. if (S2='') then
  383. begin
  384. DestS:=s1;
  385. exit;
  386. end;
  387. Location:=Length(S1);
  388. Size:=length(S2);
  389. { Use Pointer() typecasts to prevent extra conversion code }
  390. if Pointer(DestS)=Pointer(S1) then
  391. begin
  392. same:=Pointer(S1)=Pointer(S2);
  393. SetLength(DestS,Size+Location);
  394. if same then
  395. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  396. else
  397. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  398. end
  399. else if Pointer(DestS)=Pointer(S2) then
  400. begin
  401. SetLength(DestS,Size+Location);
  402. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  403. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  404. end
  405. else
  406. begin
  407. DestS:='';
  408. SetLength(DestS,Size+Location);
  409. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  410. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  411. end;
  412. end;
  413. {$endif FPC_HAS_UNICODESTR_CONCAT}
  414. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  415. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  416. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  417. Var
  418. i : Longint;
  419. p,pc : pointer;
  420. Size,NewLen : SizeInt;
  421. lowstart : longint;
  422. destcopy : pointer;
  423. OldDestLen : SizeInt;
  424. begin
  425. if high(sarr)=0 then
  426. begin
  427. DestS:='';
  428. exit;
  429. end;
  430. destcopy:=nil;
  431. lowstart:=low(sarr);
  432. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  433. inc(lowstart);
  434. { Check for another reuse, then we can't use
  435. the append optimization }
  436. for i:=lowstart to high(sarr) do
  437. begin
  438. if Pointer(DestS)=Pointer(sarr[i]) then
  439. begin
  440. { if DestS is used somewhere in the middle of the expression,
  441. we need to make sure the original string still exists after
  442. we empty/modify DestS.
  443. This trick only works with reference counted strings. Therefor
  444. this optimization is disabled for WINLIKEUNICODESTRING }
  445. destcopy:=pointer(dests);
  446. fpc_UnicodeStr_Incr_Ref(destcopy);
  447. lowstart:=low(sarr);
  448. break;
  449. end;
  450. end;
  451. { Start with empty DestS if we start with concatting
  452. the first array element }
  453. if lowstart=low(sarr) then
  454. DestS:='';
  455. OldDestLen:=length(DestS);
  456. { Calculate size of the result so we can do
  457. a single call to SetLength() }
  458. NewLen:=0;
  459. for i:=low(sarr) to high(sarr) do
  460. inc(NewLen,length(sarr[i]));
  461. SetLength(DestS,NewLen);
  462. { Concat all strings, except the string we already
  463. copied in DestS }
  464. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  465. for i:=lowstart to high(sarr) do
  466. begin
  467. p:=pointer(sarr[i]);
  468. if assigned(p) then
  469. begin
  470. Size:=length(unicodestring(p));
  471. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  472. inc(pc,size*sizeof(UnicodeChar));
  473. end;
  474. end;
  475. fpc_UnicodeStr_Decr_Ref(destcopy);
  476. end;
  477. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  478. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  479. {$define FPC_HAS_CHAR_TO_UCHAR}
  480. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  481. var
  482. w: unicodestring;
  483. begin
  484. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  485. fpc_Char_To_UChar:=w[1];
  486. end;
  487. {$endif FPC_HAS_CHAR_TO_UCHAR}
  488. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  489. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  490. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  491. {
  492. Converts a Char to a UnicodeString;
  493. }
  494. begin
  495. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  496. end;
  497. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  498. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  499. {$define FPC_HAS_UCHAR_TO_CHAR}
  500. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  501. {
  502. Converts a UnicodeChar to a Char;
  503. }
  504. var
  505. s: ansistring;
  506. begin
  507. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  508. if length(s)=1 then
  509. fpc_UChar_To_Char:= s[1]
  510. else
  511. fpc_UChar_To_Char:='?';
  512. end;
  513. {$endif FPC_HAS_UCHAR_TO_CHAR}
  514. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  515. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  516. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  517. {
  518. Converts a WideChar to a ShortString;
  519. }
  520. var
  521. s: ansistring;
  522. begin
  523. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  524. res:=s;
  525. end;
  526. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  527. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  528. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  529. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  530. {
  531. Converts a UnicodeChar to a UnicodeString;
  532. }
  533. begin
  534. Setlength (fpc_UChar_To_UnicodeStr,1);
  535. fpc_UChar_To_UnicodeStr[1]:= c;
  536. end;
  537. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  538. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  539. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  540. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  541. {
  542. Converts a UnicodeChar to a AnsiString;
  543. }
  544. {$ifndef FPC_HAS_CPSTRING}
  545. var
  546. cp : TSystemCodePage;
  547. {$endif FPC_HAS_CPSTRING}
  548. begin
  549. {$ifndef FPC_HAS_CPSTRING}
  550. cp:=DefaultSystemCodePage;
  551. {$endif FPC_HAS_CPSTRING}
  552. if (cp=CP_ACP) then
  553. cp:=DefaultSystemCodePage;
  554. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  555. end;
  556. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  557. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  558. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  559. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  560. Var
  561. L : SizeInt;
  562. begin
  563. if (not assigned(p)) or (p[0]=#0) Then
  564. begin
  565. fpc_pchar_to_unicodestr := '';
  566. exit;
  567. end;
  568. l:=IndexChar(p^,-1,#0);
  569. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  570. end;
  571. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  572. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  573. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  574. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  575. var
  576. i : SizeInt;
  577. begin
  578. if zerobased then
  579. begin
  580. if arr[0]=#0 Then
  581. begin
  582. fpc_chararray_to_unicodestr:='';
  583. exit;
  584. end;
  585. i:=IndexChar(arr,high(arr)+1,#0);
  586. if i=-1 then
  587. i:=high(arr)+1;
  588. end
  589. else
  590. i:=high(arr)+1;
  591. widestringmanager.Ansi2UnicodeMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  592. end;
  593. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  594. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  595. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  596. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  597. var
  598. i : SizeInt;
  599. begin
  600. if (zerobased) then
  601. begin
  602. i:=IndexWord(arr,high(arr)+1,0);
  603. if i = -1 then
  604. i := high(arr)+1;
  605. end
  606. else
  607. i := high(arr)+1;
  608. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  609. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  610. end;
  611. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  612. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  613. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  614. { due to their names, the following procedures should be in wstrings.inc,
  615. however, the compiler generates code using this functions on all platforms }
  616. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  617. var
  618. l: longint;
  619. index: ptrint;
  620. len: byte;
  621. temp: ansistring;
  622. begin
  623. l := high(arr)+1;
  624. if l>=high(res)+1 then
  625. l:=high(res)
  626. else if l<0 then
  627. l:=0;
  628. if zerobased then
  629. begin
  630. index:=IndexWord(arr[0],l,0);
  631. if index<0 then
  632. len:=l
  633. else
  634. len:=index;
  635. end
  636. else
  637. len:=l;
  638. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  639. res:=temp;
  640. end;
  641. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  642. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  643. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  644. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  645. var
  646. i : SizeInt;
  647. {$ifndef FPC_HAS_CPSTRING}
  648. cp : TSystemCodePage;
  649. {$endif FPC_HAS_CPSTRING}
  650. begin
  651. {$ifndef FPC_HAS_CPSTRING}
  652. cp:=DefaultSystemCodePage;
  653. {$endif FPC_HAS_CPSTRING}
  654. if (zerobased) then
  655. begin
  656. i:=IndexWord(arr,high(arr)+1,0);
  657. if i = -1 then
  658. i := high(arr)+1;
  659. end
  660. else
  661. i := high(arr)+1;
  662. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  663. end;
  664. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  665. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  666. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  667. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  668. var
  669. i : SizeInt;
  670. begin
  671. if (zerobased) then
  672. begin
  673. i:=IndexWord(arr,high(arr)+1,0);
  674. if i = -1 then
  675. i := high(arr)+1;
  676. end
  677. else
  678. i := high(arr)+1;
  679. SetLength(fpc_WideCharArray_To_WideStr,i);
  680. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  681. end;
  682. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  683. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  684. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  685. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  686. var
  687. len: SizeInt;
  688. temp: ansistring;
  689. begin
  690. len := length(src);
  691. { make sure we don't dereference src if it can be nil (JM) }
  692. if len > 0 then
  693. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  694. len := length(temp);
  695. if len > length(res) then
  696. len := length(res);
  697. {$push}
  698. {$r-}
  699. move(temp[1],res[0],len);
  700. fillchar(res[len],length(res)-len,0);
  701. {$pop}
  702. end;
  703. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  704. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  705. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  706. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  707. var
  708. len: SizeInt;
  709. temp: widestring;
  710. begin
  711. len := length(src);
  712. { make sure we don't dereference src if it can be nil (JM) }
  713. if len > 0 then
  714. widestringmanager.ansi2widemoveproc(pchar(@src[1]),StringCodePage(src),temp,len);
  715. len := length(temp);
  716. if len > length(res) then
  717. len := length(res);
  718. {$push}
  719. {$r-}
  720. move(temp[1],res[0],len*sizeof(widechar));
  721. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  722. {$pop}
  723. end;
  724. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  725. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  726. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  727. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  728. var
  729. len: longint;
  730. temp : widestring;
  731. begin
  732. len := length(src);
  733. { make sure we don't access char 1 if length is 0 (JM) }
  734. if len > 0 then
  735. widestringmanager.ansi2widemoveproc(pchar(@src[1]),DefaultSystemCodePage,temp,len);
  736. len := length(temp);
  737. if len > length(res) then
  738. len := length(res);
  739. {$push}
  740. {$r-}
  741. move(temp[1],res[0],len*sizeof(widechar));
  742. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  743. {$pop}
  744. end;
  745. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  746. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  747. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  748. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  749. var
  750. len: SizeInt;
  751. begin
  752. len := length(src);
  753. if len > length(res) then
  754. len := length(res);
  755. {$push}
  756. {$r-}
  757. { make sure we don't try to access element 1 of the widestring if it's nil }
  758. if len > 0 then
  759. move(src[1],res[0],len*SizeOf(WideChar));
  760. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  761. {$pop}
  762. end;
  763. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  764. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  765. {$define FPC_HAS_UNICODESTR_COMPARE}
  766. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  767. {
  768. Compares 2 UnicodeStrings;
  769. The result is
  770. <0 if S1<S2
  771. 0 if S1=S2
  772. >0 if S1>S2
  773. }
  774. Var
  775. MaxI,Temp : SizeInt;
  776. begin
  777. if pointer(S1)=pointer(S2) then
  778. begin
  779. fpc_UnicodeStr_Compare:=0;
  780. exit;
  781. end;
  782. Maxi:=Length(S1);
  783. temp:=Length(S2);
  784. If MaxI>Temp then
  785. MaxI:=Temp;
  786. Temp:=CompareWord(S1[1],S2[1],MaxI);
  787. if temp=0 then
  788. temp:=Length(S1)-Length(S2);
  789. fpc_UnicodeStr_Compare:=Temp;
  790. end;
  791. {$endif FPC_HAS_UNICODESTR_COMPARE}
  792. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  793. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  794. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  795. {
  796. Compares 2 UnicodeStrings for equality only;
  797. The result is
  798. 0 if S1=S2
  799. <>0 if S1<>S2
  800. }
  801. Var
  802. MaxI : SizeInt;
  803. begin
  804. if pointer(S1)=pointer(S2) then
  805. exit(0);
  806. Maxi:=Length(S1);
  807. If MaxI<>Length(S2) then
  808. exit(-1)
  809. else
  810. exit(CompareWord(S1[1],S2[1],MaxI));
  811. end;
  812. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  813. {$ifdef VER2_4}
  814. // obsolete but needed for bootstrapping with 2.4
  815. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  816. begin
  817. if p=nil then
  818. HandleErrorFrame(201,get_frame);
  819. end;
  820. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  821. begin
  822. if (index>len) or (Index<1) then
  823. HandleErrorFrame(201,get_frame);
  824. end;
  825. {$else VER2_4}
  826. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  827. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  828. Procedure fpc_UnicodeStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  829. begin
  830. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  831. HandleErrorFrame(201,get_frame);
  832. end;
  833. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  834. {$endif VER2_4}
  835. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  836. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  837. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  838. {
  839. Sets The length of string S to L.
  840. Makes sure S is unique, and contains enough room.
  841. }
  842. Var
  843. Temp : Pointer;
  844. movelen: SizeInt;
  845. lens, lena : SizeUInt;
  846. begin
  847. if (l>0) then
  848. begin
  849. if Pointer(S)=nil then
  850. begin
  851. { Need a complete new string...}
  852. Pointer(s):=NewUnicodeString(l);
  853. end
  854. else
  855. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  856. begin
  857. Temp:=Pointer(s)-UnicodeFirstOff;
  858. lens:=MemSize(Temp);
  859. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  860. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  861. begin
  862. reallocmem(Temp, lena);
  863. Pointer(S):=Temp+UnicodeFirstOff;
  864. end;
  865. end
  866. else
  867. begin
  868. { Reallocation is needed... }
  869. Temp:=NewUnicodeString(L);
  870. if Length(S)>0 then
  871. begin
  872. if l < succ(length(s)) then
  873. movelen := l
  874. { also move terminating null }
  875. else
  876. movelen := succ(length(s));
  877. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  878. end;
  879. fpc_unicodestr_decr_ref(Pointer(S));
  880. Pointer(S):=Temp;
  881. end;
  882. { Force nil termination in case it gets shorter }
  883. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  884. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  885. end
  886. else { length=0, deallocate the string }
  887. fpc_unicodestr_decr_ref (Pointer(S));
  888. end;
  889. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  890. {*****************************************************************************
  891. Public functions, In interface.
  892. *****************************************************************************}
  893. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  894. begin
  895. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  896. end;
  897. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  898. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  899. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  900. begin
  901. result:=StringToWideChar(Src,Dest,DestSize);
  902. end;
  903. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  904. function WideCharToString(S : PWideChar) : UnicodeString;
  905. begin
  906. result:=WideCharLenToString(s,Length(WideString(s)));
  907. end;
  908. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  909. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  910. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  911. var
  912. temp:widestring;
  913. begin
  914. widestringmanager.Ansi2WideMoveProc(PChar(Src),StringCodePage(Src),temp,Length(Src));
  915. if Length(temp)<DestSize then
  916. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  917. else
  918. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  919. Dest[DestSize-1]:=#0;
  920. result:=Dest;
  921. end;
  922. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  923. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  924. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  925. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  926. begin
  927. SetLength(result,Len);
  928. Move(S^,Pointer(Result)^,Len*2);
  929. end;
  930. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  931. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  932. begin
  933. Dest:=UnicodeCharLenToString(Src,Len);
  934. end;
  935. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  936. begin
  937. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  938. end;
  939. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  940. begin
  941. Dest:=AnsiString(UnicodeCharToString(S));
  942. end;
  943. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  944. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  945. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  946. begin
  947. SetLength(result,Len);
  948. Move(S^,Pointer(Result)^,Len*2);
  949. end;
  950. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  951. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  952. begin
  953. Dest:=WideCharLenToString(Src,Len);
  954. end;
  955. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  956. begin
  957. Dest:=AnsiString(WideCharLenToString(Src,Len));
  958. end;
  959. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  960. begin
  961. Dest:=WideCharToString(S);
  962. end;
  963. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  964. begin
  965. Dest:=AnsiString(WideCharToString(S));
  966. end;
  967. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  968. {$define FPC_HAS_UNICODESTR_UNIQUE}
  969. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  970. {
  971. Make sure reference count of S is 1,
  972. using copy-on-write semantics.
  973. }
  974. Var
  975. SNew : Pointer;
  976. L : SizeInt;
  977. begin
  978. pointer(result) := pointer(s);
  979. If Pointer(S)=Nil then
  980. exit;
  981. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  982. begin
  983. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  984. SNew:=NewUnicodeString (L);
  985. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  986. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  987. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  988. pointer(S):=SNew;
  989. pointer(result):=SNew;
  990. end;
  991. end;
  992. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  993. {$ifndef FPC_HAS_UNICODESTR_COPY}
  994. {$define FPC_HAS_UNICODESTR_COPY}
  995. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  996. var
  997. ResultAddress : Pointer;
  998. begin
  999. ResultAddress:=Nil;
  1000. dec(index);
  1001. if Index < 0 then
  1002. Index := 0;
  1003. { Check Size. Accounts for Zero-length S, the double check is needed because
  1004. Size can be maxint and will get <0 when adding index }
  1005. if (Size>Length(S)) or
  1006. (Index+Size>Length(S)) then
  1007. Size:=Length(S)-Index;
  1008. If Size>0 then
  1009. begin
  1010. ResultAddress:=NewUnicodeString(Size);
  1011. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1012. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1013. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1014. end;
  1015. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1016. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1017. end;
  1018. {$endif FPC_HAS_UNICODESTR_COPY}
  1019. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1020. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1021. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1022. var
  1023. i,MaxLen : SizeInt;
  1024. pc : punicodechar;
  1025. begin
  1026. Pos:=0;
  1027. if Length(SubStr)>0 then
  1028. begin
  1029. MaxLen:=Length(source)-Length(SubStr);
  1030. i:=0;
  1031. pc:=@source[1];
  1032. while (i<=MaxLen) do
  1033. begin
  1034. inc(i);
  1035. if (SubStr[1]=pc^) and
  1036. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1037. begin
  1038. Pos:=i;
  1039. exit;
  1040. end;
  1041. inc(pc);
  1042. end;
  1043. end;
  1044. end;
  1045. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1046. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1047. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1048. { Faster version for a unicodechar alone }
  1049. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1050. var
  1051. i: SizeInt;
  1052. pc : punicodechar;
  1053. begin
  1054. pc:=@s[1];
  1055. for i:=1 to length(s) do
  1056. begin
  1057. if pc^=c then
  1058. begin
  1059. pos:=i;
  1060. exit;
  1061. end;
  1062. inc(pc);
  1063. end;
  1064. pos:=0;
  1065. end;
  1066. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1067. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1068. block, which is significant bloat without any sensible speed improvement. }
  1069. Function Pos (const c : RawByteString; Const s : UnicodeString) : SizeInt;
  1070. begin
  1071. result:=Pos(UnicodeString(c),s);
  1072. end;
  1073. Function Pos (const c : ShortString; Const s : UnicodeString) : SizeInt;
  1074. begin
  1075. result:=Pos(UnicodeString(c),s);
  1076. end;
  1077. Function Pos (const c : UnicodeString; Const s : RawByteString) : SizeInt;
  1078. begin
  1079. result:=Pos(c,UnicodeString(s));
  1080. end;
  1081. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1082. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1083. { Faster version for a char alone. Must be implemented because }
  1084. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1085. { using pos(char,pchar) will always call the shortstring version }
  1086. { (exact match for first argument), also with $h+ (JM) }
  1087. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1088. var
  1089. i: SizeInt;
  1090. wc : unicodechar;
  1091. pc : punicodechar;
  1092. begin
  1093. wc:=c;
  1094. pc:=@s[1];
  1095. for i:=1 to length(s) do
  1096. begin
  1097. if pc^=wc then
  1098. begin
  1099. pos:=i;
  1100. exit;
  1101. end;
  1102. inc(pc);
  1103. end;
  1104. pos:=0;
  1105. end;
  1106. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1107. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1108. {$define FPC_HAS_DELETE_UNICODESTR}
  1109. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1110. Var
  1111. LS : SizeInt;
  1112. begin
  1113. LS:=Length(S);
  1114. if (Index>LS) or (Index<=0) or (Size<=0) then
  1115. exit;
  1116. UniqueString (S);
  1117. { (Size+Index) will overflow if Size=MaxInt. }
  1118. if Size>LS-Index then
  1119. Size:=LS-Index+1;
  1120. if Size<=LS-Index then
  1121. begin
  1122. Dec(Index);
  1123. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1124. end;
  1125. Setlength(s,LS-Size);
  1126. end;
  1127. {$endif FPC_HAS_DELETE_UNICODESTR}
  1128. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1129. {$define FPC_HAS_INSERT_UNICODESTR}
  1130. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1131. var
  1132. Temp : UnicodeString;
  1133. LS : SizeInt;
  1134. begin
  1135. If Length(Source)=0 then
  1136. exit;
  1137. if index <= 0 then
  1138. index := 1;
  1139. Ls:=Length(S);
  1140. if index > LS then
  1141. index := LS+1;
  1142. Dec(Index);
  1143. SetLength(Temp,Length(Source)+LS);
  1144. If Index>0 then
  1145. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1146. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1147. If (LS-Index)>0 then
  1148. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1149. S:=Temp;
  1150. end;
  1151. {$endif FPC_HAS_INSERT_UNICODESTR}
  1152. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1153. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1154. Function UpCase(c:UnicodeChar):UnicodeChar;
  1155. var
  1156. s : UnicodeString;
  1157. begin
  1158. s:=c;
  1159. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1160. end;
  1161. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1162. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1163. {$define FPC_HAS_UPCASE_UNICODESTR}
  1164. function UpCase(const s : UnicodeString) : UnicodeString;
  1165. begin
  1166. result:=widestringmanager.UpperUnicodeStringProc(s);
  1167. end;
  1168. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1169. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1170. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1171. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1172. var
  1173. s : UnicodeString;
  1174. begin
  1175. s:=c;
  1176. result:=widestringmanager.LowerUnicodeStringProc(s)[1];
  1177. end;
  1178. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1179. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1180. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1181. function LowerCase(const s : UnicodeString) : UnicodeString;
  1182. begin
  1183. result:=widestringmanager.LowerUnicodeStringProc(s);
  1184. end;
  1185. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1186. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1187. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1188. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1189. begin
  1190. SetLength(S,Len);
  1191. If (Buf<>Nil) and (Len>0) then
  1192. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1193. end;
  1194. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1195. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1196. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1197. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1198. begin
  1199. If (Buf<>Nil) and (Len>0) then
  1200. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1201. else
  1202. SetLength(S,Len);
  1203. end;
  1204. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1205. {$ifndef FPUNONE}
  1206. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1207. Var
  1208. SS: ShortString;
  1209. begin
  1210. fpc_Val_Real_UnicodeStr:=0;
  1211. if length(S)>255 then
  1212. code:=256
  1213. else
  1214. begin
  1215. SS:=ShortString(S);
  1216. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1217. end;
  1218. end;
  1219. {$endif}
  1220. {$ifndef FPC_STR_ENUM_INTERN}
  1221. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1222. var
  1223. ss: ShortString;
  1224. begin
  1225. if length(s)>255 then
  1226. code:=256
  1227. else
  1228. begin
  1229. ss:=ShortString(s);
  1230. val(ss,fpc_val_enum_unicodestr,code);
  1231. end;
  1232. end;
  1233. {$endif FPC_STR_ENUM_INTERN}
  1234. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1235. Var
  1236. SS: ShortString;
  1237. begin
  1238. if length(S)>255 then
  1239. begin
  1240. fpc_Val_Currency_UnicodeStr:=0;
  1241. code:=256;
  1242. end
  1243. else
  1244. begin
  1245. SS:=ShortString(S);
  1246. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1247. end;
  1248. end;
  1249. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1250. Var
  1251. SS: ShortString;
  1252. begin
  1253. fpc_Val_UInt_UnicodeStr:=0;
  1254. if length(S)>255 then
  1255. code:=256
  1256. else
  1257. begin
  1258. SS:=ShortString(S);
  1259. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1260. end;
  1261. end;
  1262. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1263. Var
  1264. SS: ShortString;
  1265. begin
  1266. fpc_Val_SInt_UnicodeStr:=0;
  1267. if length(S)>255 then
  1268. code:=256
  1269. else
  1270. begin
  1271. SS:=ShortString(S);
  1272. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1273. end;
  1274. end;
  1275. {$ifndef CPU64}
  1276. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1277. Var
  1278. SS: ShortString;
  1279. begin
  1280. fpc_Val_qword_UnicodeStr:=0;
  1281. if length(S)>255 then
  1282. code:=256
  1283. else
  1284. begin
  1285. SS:=ShortString(S);
  1286. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1287. end;
  1288. end;
  1289. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1290. Var
  1291. SS: ShortString;
  1292. begin
  1293. fpc_Val_int64_UnicodeStr:=0;
  1294. if length(S)>255 then
  1295. code:=256
  1296. else
  1297. begin
  1298. SS:=ShortString(S);
  1299. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1300. end;
  1301. end;
  1302. {$endif CPU64}
  1303. {$ifndef FPUNONE}
  1304. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1305. var
  1306. ss: shortstring;
  1307. begin
  1308. str_real(len,fr,d,treal_type(rt),ss);
  1309. s:=UnicodeString(ss);
  1310. end;
  1311. {$endif}
  1312. {$ifndef FPC_STR_ENUM_INTERN}
  1313. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1314. var
  1315. ss: ShortString;
  1316. begin
  1317. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1318. s:=UnicodeString(ss);
  1319. end;
  1320. {$endif FPC_STR_ENUM_INTERN}
  1321. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1322. var
  1323. ss: ShortString;
  1324. begin
  1325. fpc_shortstr_bool(b,len,ss);
  1326. s:=UnicodeString(ss);
  1327. end;
  1328. {$ifdef FPC_HAS_STR_CURRENCY}
  1329. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1330. var
  1331. ss: shortstring;
  1332. begin
  1333. str(c:len:fr,ss);
  1334. s:=UnicodeString(ss);
  1335. end;
  1336. {$endif FPC_HAS_STR_CURRENCY}
  1337. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1338. Var
  1339. SS: ShortString;
  1340. begin
  1341. Str (v:Len,SS);
  1342. S:=UnicodeString(SS);
  1343. end;
  1344. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1345. Var
  1346. SS: ShortString;
  1347. begin
  1348. str(v:Len,SS);
  1349. S:=UnicodeString(SS);
  1350. end;
  1351. {$ifndef CPU64}
  1352. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1353. Var
  1354. SS: ShortString;
  1355. begin
  1356. Str (v:Len,SS);
  1357. S:=UnicodeString(SS);
  1358. end;
  1359. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1360. Var
  1361. SS: ShortString;
  1362. begin
  1363. str(v:Len,SS);
  1364. S:=UnicodeString(SS);
  1365. end;
  1366. {$endif CPU64}
  1367. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1368. begin
  1369. if assigned(Source) then
  1370. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1371. else
  1372. Result:=0;
  1373. end;
  1374. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1375. var
  1376. i,j : SizeUInt;
  1377. lw : longword;
  1378. begin
  1379. result:=0;
  1380. if source=nil then
  1381. exit;
  1382. i:=0;
  1383. j:=0;
  1384. if assigned(Dest) then
  1385. begin
  1386. while (i<SourceChars) and (j<MaxDestBytes) do
  1387. begin
  1388. lw:=ord(Source[i]);
  1389. case lw of
  1390. 0..$7f:
  1391. begin
  1392. Dest[j]:=char(lw);
  1393. inc(j);
  1394. end;
  1395. $80..$7ff:
  1396. begin
  1397. if j+1>=MaxDestBytes then
  1398. break;
  1399. Dest[j]:=char($c0 or (lw shr 6));
  1400. Dest[j+1]:=char($80 or (lw and $3f));
  1401. inc(j,2);
  1402. end;
  1403. $800..$d7ff,$e000..$ffff:
  1404. begin
  1405. if j+2>=MaxDestBytes then
  1406. break;
  1407. Dest[j]:=char($e0 or (lw shr 12));
  1408. Dest[j+1]:=char($80 or ((lw shr 6) and $3f));
  1409. Dest[j+2]:=char($80 or (lw and $3f));
  1410. inc(j,3);
  1411. end;
  1412. $d800..$dbff:
  1413. {High Surrogates}
  1414. begin
  1415. if j+3>=MaxDestBytes then
  1416. break;
  1417. if (i+1<sourcechars) and
  1418. (word(Source[i+1]) >= $dc00) and
  1419. (word(Source[i+1]) <= $dfff) then
  1420. begin
  1421. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1422. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1423. Dest[j]:=char($f0 or (lw shr 18));
  1424. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1425. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1426. Dest[j+3]:=char($80 or (lw and $3f));
  1427. inc(j,4);
  1428. inc(i);
  1429. end;
  1430. end;
  1431. end;
  1432. inc(i);
  1433. end;
  1434. if j>SizeUInt(MaxDestBytes-1) then
  1435. j:=MaxDestBytes-1;
  1436. Dest[j]:=#0;
  1437. end
  1438. else
  1439. begin
  1440. while i<SourceChars do
  1441. begin
  1442. case word(Source[i]) of
  1443. $0..$7f:
  1444. inc(j);
  1445. $80..$7ff:
  1446. inc(j,2);
  1447. $800..$d7ff,$e000..$ffff:
  1448. inc(j,3);
  1449. $d800..$dbff:
  1450. begin
  1451. if (i+1<sourcechars) and
  1452. (word(Source[i+1]) >= $dc00) and
  1453. (word(Source[i+1]) <= $dfff) then
  1454. begin
  1455. inc(j,4);
  1456. inc(i);
  1457. end;
  1458. end;
  1459. end;
  1460. inc(i);
  1461. end;
  1462. end;
  1463. result:=j+1;
  1464. end;
  1465. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1466. begin
  1467. if assigned(Source) then
  1468. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
  1469. else
  1470. Result:=0;
  1471. end;
  1472. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1473. const
  1474. UNICODE_INVALID=63;
  1475. var
  1476. InputUTF8: SizeUInt;
  1477. IBYTE: BYTE;
  1478. OutputUnicode: SizeUInt;
  1479. PRECHAR: SizeUInt;
  1480. TempBYTE: BYTE;
  1481. CharLen: SizeUint;
  1482. LookAhead: SizeUInt;
  1483. UC: SizeUInt;
  1484. begin
  1485. if not assigned(Source) then
  1486. begin
  1487. result:=0;
  1488. exit;
  1489. end;
  1490. result:=SizeUInt(-1);
  1491. InputUTF8:=0;
  1492. OutputUnicode:=0;
  1493. PreChar:=0;
  1494. if Assigned(Dest) Then
  1495. begin
  1496. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1497. begin
  1498. IBYTE:=byte(Source[InputUTF8]);
  1499. if (IBYTE and $80) = 0 then
  1500. begin
  1501. //One character US-ASCII, convert it to unicode
  1502. if IBYTE = 10 then
  1503. begin
  1504. If (PreChar<>13) and FALSE then
  1505. begin
  1506. //Expand to crlf, conform UTF-8.
  1507. //This procedure will break the memory alocation by
  1508. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1509. if OutputUnicode+1<MaxDestChars then
  1510. begin
  1511. Dest[OutputUnicode]:=WideChar(13);
  1512. inc(OutputUnicode);
  1513. Dest[OutputUnicode]:=WideChar(10);
  1514. inc(OutputUnicode);
  1515. PreChar:=10;
  1516. end
  1517. else
  1518. begin
  1519. Dest[OutputUnicode]:=WideChar(13);
  1520. inc(OutputUnicode);
  1521. end;
  1522. end
  1523. else
  1524. begin
  1525. Dest[OutputUnicode]:=WideChar(IBYTE);
  1526. inc(OutputUnicode);
  1527. PreChar:=IBYTE;
  1528. end;
  1529. end
  1530. else
  1531. begin
  1532. Dest[OutputUnicode]:=WideChar(IBYTE);
  1533. inc(OutputUnicode);
  1534. PreChar:=IBYTE;
  1535. end;
  1536. inc(InputUTF8);
  1537. end
  1538. else
  1539. begin
  1540. TempByte:=IBYTE;
  1541. CharLen:=0;
  1542. while (TempBYTE and $80)<>0 do
  1543. begin
  1544. TempBYTE:=(TempBYTE shl 1) and $FE;
  1545. inc(CharLen);
  1546. end;
  1547. //Test for the "CharLen" conforms UTF-8 string
  1548. //This means the 10xxxxxx pattern.
  1549. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1550. begin
  1551. //Insuficient chars in string to decode
  1552. //UTF-8 array. Fallback to single char.
  1553. CharLen:= 1;
  1554. end;
  1555. for LookAhead := 1 to CharLen-1 do
  1556. begin
  1557. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1558. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1559. begin
  1560. //Invalid UTF-8 sequence, fallback.
  1561. CharLen:= LookAhead;
  1562. break;
  1563. end;
  1564. end;
  1565. UC:=$FFFF;
  1566. case CharLen of
  1567. 1: begin
  1568. //Not valid UTF-8 sequence
  1569. UC:=UNICODE_INVALID;
  1570. end;
  1571. 2: begin
  1572. //Two bytes UTF, convert it
  1573. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1574. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1575. if UC <= $7F then
  1576. begin
  1577. //Invalid UTF sequence.
  1578. UC:=UNICODE_INVALID;
  1579. end;
  1580. end;
  1581. 3: begin
  1582. //Three bytes, convert it to unicode
  1583. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1584. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1585. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1586. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1587. begin
  1588. //Invalid UTF-8 sequence
  1589. UC:= UNICODE_INVALID;
  1590. End;
  1591. end;
  1592. 4: begin
  1593. //Four bytes, convert it to two unicode characters
  1594. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1595. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1596. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1597. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1598. if (UC < $10000) or (UC > $10FFFF) then
  1599. begin
  1600. UC:= UNICODE_INVALID;
  1601. end
  1602. else
  1603. begin
  1604. { only store pair if room }
  1605. dec(UC,$10000);
  1606. if (OutputUnicode<MaxDestChars-1) then
  1607. begin
  1608. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1609. inc(OutputUnicode);
  1610. UC:=(UC and $3ff) + $DC00;
  1611. end
  1612. else
  1613. begin
  1614. InputUTF8:= InputUTF8 + CharLen;
  1615. { don't store anything }
  1616. CharLen:=0;
  1617. end;
  1618. end;
  1619. end;
  1620. 5,6,7: begin
  1621. //Invalid UTF8 to unicode conversion,
  1622. //mask it as invalid UNICODE too.
  1623. UC:=UNICODE_INVALID;
  1624. end;
  1625. end;
  1626. if CharLen > 0 then
  1627. begin
  1628. PreChar:=UC;
  1629. Dest[OutputUnicode]:=WideChar(UC);
  1630. inc(OutputUnicode);
  1631. end;
  1632. InputUTF8:= InputUTF8 + CharLen;
  1633. end;
  1634. end;
  1635. Result:=OutputUnicode+1;
  1636. end
  1637. else
  1638. begin
  1639. while (InputUTF8<SourceBytes) do
  1640. begin
  1641. IBYTE:=byte(Source[InputUTF8]);
  1642. if (IBYTE and $80) = 0 then
  1643. begin
  1644. //One character US-ASCII, convert it to unicode
  1645. if IBYTE = 10 then
  1646. begin
  1647. if (PreChar<>13) and FALSE then
  1648. begin
  1649. //Expand to crlf, conform UTF-8.
  1650. //This procedure will break the memory alocation by
  1651. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1652. inc(OutputUnicode,2);
  1653. PreChar:=10;
  1654. end
  1655. else
  1656. begin
  1657. inc(OutputUnicode);
  1658. PreChar:=IBYTE;
  1659. end;
  1660. end
  1661. else
  1662. begin
  1663. inc(OutputUnicode);
  1664. PreChar:=IBYTE;
  1665. end;
  1666. inc(InputUTF8);
  1667. end
  1668. else
  1669. begin
  1670. TempByte:=IBYTE;
  1671. CharLen:=0;
  1672. while (TempBYTE and $80)<>0 do
  1673. begin
  1674. TempBYTE:=(TempBYTE shl 1) and $FE;
  1675. inc(CharLen);
  1676. end;
  1677. //Test for the "CharLen" conforms UTF-8 string
  1678. //This means the 10xxxxxx pattern.
  1679. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1680. begin
  1681. //Insuficient chars in string to decode
  1682. //UTF-8 array. Fallback to single char.
  1683. CharLen:= 1;
  1684. end;
  1685. for LookAhead := 1 to CharLen-1 do
  1686. begin
  1687. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1688. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1689. begin
  1690. //Invalid UTF-8 sequence, fallback.
  1691. CharLen:= LookAhead;
  1692. break;
  1693. end;
  1694. end;
  1695. UC:=$FFFF;
  1696. case CharLen of
  1697. 1: begin
  1698. //Not valid UTF-8 sequence
  1699. UC:=UNICODE_INVALID;
  1700. end;
  1701. 2: begin
  1702. //Two bytes UTF, convert it
  1703. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1704. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1705. if UC <= $7F then
  1706. begin
  1707. //Invalid UTF sequence.
  1708. UC:=UNICODE_INVALID;
  1709. end;
  1710. end;
  1711. 3: begin
  1712. //Three bytes, convert it to unicode
  1713. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1714. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1715. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1716. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1717. begin
  1718. //Invalid UTF-8 sequence
  1719. UC:= UNICODE_INVALID;
  1720. end;
  1721. end;
  1722. 4: begin
  1723. //Four bytes, convert it to two unicode characters
  1724. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1725. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1726. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1727. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1728. if (UC < $10000) or (UC > $10FFFF) then
  1729. UC:= UNICODE_INVALID
  1730. else
  1731. { extra character character }
  1732. inc(OutputUnicode);
  1733. end;
  1734. 5,6,7: begin
  1735. //Invalid UTF8 to unicode conversion,
  1736. //mask it as invalid UNICODE too.
  1737. UC:=UNICODE_INVALID;
  1738. end;
  1739. end;
  1740. if CharLen > 0 then
  1741. begin
  1742. PreChar:=UC;
  1743. inc(OutputUnicode);
  1744. end;
  1745. InputUTF8:= InputUTF8 + CharLen;
  1746. end;
  1747. end;
  1748. Result:=OutputUnicode+1;
  1749. end;
  1750. end;
  1751. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1752. begin
  1753. Result:=UTF8Encode(UnicodeString(s));
  1754. end;
  1755. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1756. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1757. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1758. var
  1759. i : SizeInt;
  1760. hs : UTF8String;
  1761. begin
  1762. result:='';
  1763. if s='' then
  1764. exit;
  1765. SetLength(hs,length(s)*3);
  1766. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1767. if i>0 then
  1768. begin
  1769. SetLength(hs,i-1);
  1770. result:=hs;
  1771. end;
  1772. end;
  1773. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1774. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1775. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1776. function UTF8Decode(const s : RawByteString): UnicodeString;
  1777. var
  1778. i : SizeInt;
  1779. hs : UnicodeString;
  1780. begin
  1781. result:='';
  1782. if s='' then
  1783. exit;
  1784. SetLength(hs,length(s));
  1785. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1786. if i>0 then
  1787. begin
  1788. SetLength(hs,i-1);
  1789. result:=hs;
  1790. end;
  1791. end;
  1792. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1793. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1794. begin
  1795. Result:=Utf8Encode(s);
  1796. end;
  1797. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1798. begin
  1799. Result:=RawByteString(Utf8Decode(s));
  1800. end;
  1801. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1802. var
  1803. i, reslen: sizeint;
  1804. w: longint;
  1805. begin
  1806. reslen:=0;
  1807. i:=0;
  1808. { calculate required length }
  1809. while (i<len) do
  1810. begin
  1811. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1812. inc(i)
  1813. else if (p[i]<=#$dbff) and
  1814. (i+1<len) and
  1815. (p[i+1]>=#$dc00) and
  1816. (p[i+1]<=#$dfff) then
  1817. inc(i,2)
  1818. else
  1819. inc(i);
  1820. inc(reslen);
  1821. end;
  1822. SetLength(res,reslen+1); { +1 for null termination }
  1823. reslen:=0;
  1824. i:=0;
  1825. { do conversion }
  1826. while (i<len) do
  1827. begin
  1828. w:=ord(p[i]);
  1829. if (w<=$d7ff) or (w>=$e000) then
  1830. res[reslen]:=w
  1831. else if (w<=$dbff) and
  1832. (i+1<len) and
  1833. (p[i+1]>=#$dc00) and
  1834. (p[i+1]<=#$dfff) then
  1835. begin
  1836. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1837. inc(i);
  1838. end
  1839. else { invalid surrogate pair }
  1840. res[reslen]:=w;
  1841. inc(i);
  1842. inc(reslen);
  1843. end;
  1844. res[reslen]:=0;
  1845. end;
  1846. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1847. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1848. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1849. begin
  1850. UCS4Encode(PWideChar(s),Length(s),result);
  1851. end;
  1852. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1853. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1854. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1855. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1856. begin
  1857. UCS4Encode(PWideChar(s),Length(s),result);
  1858. end;
  1859. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1860. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1861. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1862. { dest should point to previously allocated wide/unicodestring }
  1863. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1864. var
  1865. i: sizeint;
  1866. nc: UCS4Char;
  1867. begin
  1868. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1869. begin
  1870. nc:=s[i];
  1871. if (nc<$ffff) then
  1872. dest^:=widechar(nc)
  1873. else if (dword(nc)<=$10ffff) then
  1874. begin
  1875. dest^:=widechar(nc shr 10 + $d7c0);
  1876. { subtracting $10000 doesn't change low 10 bits }
  1877. dest[1]:=widechar(nc and $3ff + $dc00);
  1878. inc(dest);
  1879. end
  1880. else { invalid code point }
  1881. dest^:='?';
  1882. inc(dest);
  1883. end;
  1884. end;
  1885. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1886. var
  1887. i : SizeInt;
  1888. reslen : SizeInt;
  1889. begin
  1890. reslen:=0;
  1891. for i:=0 to length(s)-2 do { skip terminating #0 }
  1892. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1893. SetLength(result,reslen);
  1894. UCS4Decode(s,pointer(result));
  1895. end;
  1896. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1897. var
  1898. i : SizeInt;
  1899. reslen : SizeInt;
  1900. begin
  1901. reslen:=0;
  1902. for i:=0 to length(s)-2 do { skip terminating #0 }
  1903. Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
  1904. SetLength(result,reslen);
  1905. UCS4Decode(s,pointer(result));
  1906. end;
  1907. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1908. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1909. const
  1910. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  1911. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  1912. procedure unimplementedunicodestring;
  1913. begin
  1914. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1915. If IsConsole then
  1916. begin
  1917. Writeln(StdErr,SNoUnicodestrings);
  1918. Writeln(StdErr,SRecompileWithUnicodestrings);
  1919. end;
  1920. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1921. HandleErrorFrame(233,get_frame);
  1922. end;
  1923. function StringElementSize(const S: UnicodeString): Word; overload;
  1924. begin
  1925. if assigned(Pointer(S)) then
  1926. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  1927. else
  1928. Result:=SizeOf(UnicodeChar);
  1929. end;
  1930. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  1931. begin
  1932. if assigned(Pointer(S)) then
  1933. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  1934. else
  1935. Result:=0;
  1936. end;
  1937. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  1938. begin
  1939. {$ifdef FPC_HAS_CPSTRING}
  1940. if assigned(Pointer(S)) then
  1941. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  1942. else
  1943. {$endif FPC_HAS_CPSTRING}
  1944. Result:=DefaultUnicodeCodePage;
  1945. end;
  1946. {$warnings off}
  1947. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  1948. begin
  1949. unimplementedunicodestring;
  1950. end;
  1951. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  1952. begin
  1953. unimplementedunicodestring;
  1954. end;
  1955. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  1956. begin
  1957. unimplementedunicodestring;
  1958. end;
  1959. {$warnings on}
  1960. procedure initunicodestringmanager;
  1961. begin
  1962. {$ifndef HAS_WIDESTRINGMANAGER}
  1963. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  1964. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  1965. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  1966. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  1967. {$endif HAS_WIDESTRINGMANAGER}
  1968. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  1969. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  1970. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1971. {$ifndef HAS_WIDESTRINGMANAGER}
  1972. widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  1973. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  1974. widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
  1975. widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
  1976. {$endif HAS_WIDESTRINGMANAGER}
  1977. widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
  1978. widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
  1979. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  1980. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  1981. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  1982. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  1983. end;
  1984. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}