ustrings.inc 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S= SizeOf(SizeInt), R= (if CPU64 then SizeOf(Longint) else SizeOf(SizeInt))):
  19. @-S-R : Reference count (R bytes)
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$if not defined(VER3_0) and not defined(VER3_2)}
  34. {$ifdef CPU64}
  35. Ref : Longint;
  36. {$else}
  37. Ref : SizeInt;
  38. {$endif}
  39. {$else}
  40. {$ifdef CPU64}
  41. { align fields }
  42. Dummy : DWord;
  43. {$endif CPU64}
  44. Ref : SizeInt;
  45. {$endif}
  46. Len : SizeInt;
  47. end;
  48. Const
  49. UnicodeFirstOff = SizeOf(TUnicodeRec);
  50. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  51. {
  52. Default UnicodeChar <-> AnsiChar conversion is to only convert the
  53. lower 127 chars, all others are translated to '?'.
  54. These routines can be overridden for the Current Locale
  55. }
  56. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  57. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  58. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. p : PAnsiChar;
  62. begin
  63. setlength(dest,len);
  64. if not assigned(pointer(dest)) then
  65. exit;
  66. SetCodePage(dest,cp,false);
  67. p:=pointer(dest); {SetLength guarantees that dest is unique}
  68. for i:=1 to len do
  69. begin
  70. if word(source^)<256 then
  71. p^:=AnsiChar(word(source^))
  72. else
  73. p^:='?';
  74. inc(source);
  75. inc(p);
  76. end;
  77. end;
  78. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  79. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  80. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  81. procedure DefaultAnsi2UnicodeMove(source:pansichar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  82. var
  83. i : SizeInt;
  84. p : PUnicodeChar;
  85. begin
  86. setlength(dest,len);
  87. p:=pointer(dest); {SetLength guarantees that dest is unique}
  88. for i:=1 to len do
  89. begin
  90. p^:=unicodechar(byte(source^));
  91. inc(source);
  92. inc(p);
  93. end;
  94. end;
  95. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  96. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  97. function DefaultCharLengthPChar(const Str: PAnsiChar): PtrInt;
  98. begin
  99. DefaultCharLengthPChar:=length(Str);
  100. end;
  101. function DefaultCodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
  102. begin
  103. if str[0]<>#0 then
  104. DefaultCodePointLength:=1
  105. else
  106. DefaultCodePointLength:=0;
  107. end;
  108. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  109. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  110. begin
  111. { don't raise an exception here. We need this for text file handling }
  112. if stdcp<>scpFileSystemSingleByte then
  113. Result:=DefaultSystemCodePage
  114. else
  115. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  116. without a fully functional widestring manager that will probably cause
  117. more problems that it solves }
  118. Result:=DefaultFileSystemCodePage
  119. end;
  120. Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
  121. begin
  122. manager:=widestringmanager;
  123. end;
  124. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
  125. begin
  126. Old:=widestringmanager;
  127. widestringmanager:=New;
  128. end;
  129. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  130. begin
  131. widestringmanager:=New;
  132. end;
  133. Procedure GetWideStringManager (out Manager : TUnicodeStringManager);
  134. begin
  135. manager:=widestringmanager;
  136. end;
  137. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out old: TUnicodeStringManager);
  138. begin
  139. Old:=widestringmanager;
  140. widestringmanager:=New;
  141. end;
  142. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  143. begin
  144. widestringmanager:=New;
  145. end;
  146. {****************************************************************************
  147. Internal functions, not in interface.
  148. ****************************************************************************}
  149. procedure UnicodeStringError;
  150. begin
  151. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  152. end;
  153. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  154. {$define FPC_HAS_NEW_UNICODESTRING}
  155. Function NewUnicodeString(Len : SizeInt) : Pointer;
  156. {
  157. Allocate a new UnicodeString on the heap.
  158. initialize it to zero length and reference count 1.
  159. }
  160. Var
  161. P : Pointer;
  162. begin
  163. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  164. If P<>Nil then
  165. begin
  166. PUnicodeRec(P)^.Len:=Len; { Initial length }
  167. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  168. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  169. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  170. inc(p,UnicodeFirstOff); { Points to string now }
  171. PUnicodeChar(P)^:=#0; { Terminating #0 }
  172. end
  173. else
  174. UnicodeStringError;
  175. NewUnicodeString:=P;
  176. end;
  177. {$endif FPC_HAS_NEW_UNICODESTRING}
  178. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  179. {$define FPC_HAS_UNICODESTR_DECR_REF}
  180. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  181. {
  182. Decreases the ReferenceCount of a non constant unicodestring;
  183. If the reference count is zero, deallocate the string;
  184. }
  185. Var
  186. p: PUnicodeRec;
  187. Begin
  188. { Zero string }
  189. if S=Nil then
  190. exit;
  191. { check for constant strings ...}
  192. p:=PUnicodeRec(S-UnicodeFirstOff);
  193. S:=nil;
  194. if p^.Ref<0 then
  195. exit;
  196. { declocked does a MT safe dec and returns true, if the counter is 0 }
  197. if declocked(p^.Ref) then
  198. FreeMem(p);
  199. end;
  200. { alias for internal use }
  201. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  202. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  203. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  204. {$define FPC_HAS_UNICODESTR_INCR_REF}
  205. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  206. Begin
  207. If S=Nil then
  208. exit;
  209. { constant string ? }
  210. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  211. exit;
  212. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  213. end;
  214. { alias for internal use }
  215. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  216. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  217. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  218. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  219. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  220. {
  221. Converts a UnicodeString to a ShortString;
  222. }
  223. Var
  224. Size : SizeInt;
  225. temp : ansistring;
  226. begin
  227. res:='';
  228. Size:=Length(S2);
  229. if Size>0 then
  230. begin
  231. If Size>high(res) then
  232. Size:=high(res);
  233. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  234. res:=temp;
  235. end;
  236. end;
  237. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  238. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  239. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  240. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  241. {
  242. Converts a ShortString to a UnicodeString;
  243. }
  244. Var
  245. Size : SizeInt;
  246. begin
  247. result:='';
  248. Size:=Length(S2);
  249. if Size>0 then
  250. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(@S2[1]),DefaultSystemCodePage,result,Size);
  251. end;
  252. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  253. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  254. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  255. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  256. {
  257. Converts a UnicodeString to an AnsiString
  258. }
  259. Var
  260. Size : SizeInt;
  261. {$ifndef FPC_HAS_CPSTRING}
  262. cp : TSystemCodePage;
  263. {$endif FPC_HAS_CPSTRING}
  264. begin
  265. {$ifndef FPC_HAS_CPSTRING}
  266. cp:=DefaultSystemCodePage;
  267. {$endif FPC_HAS_CPSTRING}
  268. result:='';
  269. Size:=Length(S2);
  270. if Size>0 then
  271. begin
  272. cp:=TranslatePlaceholderCP(cp);
  273. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  274. end;
  275. end;
  276. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  277. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  278. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  279. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  280. {
  281. Converts an AnsiString to a UnicodeString;
  282. }
  283. Var
  284. Size : SizeInt;
  285. cp: TSystemCodePage;
  286. begin
  287. result:='';
  288. Size:=Length(S2);
  289. if Size>0 then
  290. begin
  291. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  292. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(S2),cp,result,Size);
  293. end;
  294. end;
  295. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  296. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  297. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  298. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  304. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  305. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  306. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  307. begin
  308. SetLength(Result,Length(S2));
  309. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  310. end;
  311. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  312. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  313. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  314. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  315. var
  316. Size : SizeInt;
  317. begin
  318. result:='';
  319. if p=nil then
  320. exit;
  321. Size := IndexWord(p^, -1, 0);
  322. Setlength(result,Size);
  323. if Size>0 then
  324. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  325. end;
  326. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  327. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  328. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  329. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  330. var
  331. Size : SizeInt;
  332. {$ifndef FPC_HAS_CPSTRING}
  333. cp : TSystemCodePage;
  334. {$endif FPC_HAS_CPSTRING}
  335. begin
  336. {$ifndef FPC_HAS_CPSTRING}
  337. cp:=DefaultSystemCodePage;
  338. {$endif FPC_HAS_CPSTRING}
  339. result:='';
  340. if p=nil then
  341. exit;
  342. Size := IndexWord(p^, -1, 0);
  343. if Size>0 then
  344. begin
  345. cp:=TranslatePlaceholderCP(cp);
  346. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  347. end;
  348. end;
  349. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  350. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  351. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  352. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  353. var
  354. Size : SizeInt;
  355. temp: ansistring;
  356. begin
  357. res:='';
  358. if p=nil then
  359. exit;
  360. Size:=IndexWord(p^, high(PtrInt), 0);
  361. if Size>0 then
  362. begin
  363. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  364. res:=temp;
  365. end;
  366. end;
  367. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  368. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  369. {$define FPC_UNICODESTR_ASSIGN}
  370. { checked against the ansistring routine, 2001-05-27 (FK) }
  371. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  372. {
  373. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  374. }
  375. begin
  376. If S2<>nil then
  377. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  378. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  379. { Decrease the reference count on the old S1 }
  380. fpc_unicodestr_decr_ref (S1);
  381. s1:=s2;
  382. end;
  383. { alias for internal use }
  384. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  385. {$endif FPC_UNICODESTR_ASSIGN}
  386. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  387. {$define FPC_HAS_UNICODESTR_CONCAT}
  388. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  389. Var
  390. S1Len,S2Len : SizeInt;
  391. OldDestP,NewDestP,RealDestP,Src : Pointer;
  392. begin
  393. { only assign if s1 or s2 is empty }
  394. if Length(S1)=0 then
  395. begin
  396. DestS:=s2;
  397. exit;
  398. end;
  399. if Length(S2)=0 then
  400. begin
  401. DestS:=s1;
  402. exit;
  403. end;
  404. S1Len:=PUnicodeRec(Pointer(S1)-UnicodeFirstOff)^.Len;
  405. S2Len:=PUnicodeRec(Pointer(S2)-UnicodeFirstOff)^.Len;
  406. OldDestP:=Pointer(DestS);
  407. { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
  408. if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
  409. begin
  410. RealDestP:=OldDestP-UnicodeFirstOff;
  411. NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+(S1Len+S2Len)*sizeof(UnicodeChar))+UnicodeFirstOff;
  412. { Copy S2 first, as in the case of OldDestP = Pointer(S2) it must be copied first and in other cases the order does not matter. }
  413. Src:=Pointer(S2);
  414. if Src=OldDestP then
  415. Src:=NewDestP;
  416. Move(Src^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
  417. if OldDestP<>Pointer(S1) then { Not an append, need to copy S1? }
  418. Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
  419. end
  420. else
  421. begin
  422. NewDestP:=NewUnicodeString(S1Len+S2Len);
  423. Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
  424. Move(Pointer(S2)^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
  425. fpc_unicodestr_decr_ref(Pointer(DestS));
  426. end;
  427. PUnicodeChar(NewDestP)[S1Len+S2Len]:=#0;
  428. PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=S1Len+S2Len;
  429. Pointer(DestS):=NewDestP;
  430. end;
  431. {$endif FPC_HAS_UNICODESTR_CONCAT}
  432. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  433. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  434. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  435. Var
  436. lowstart,i,Size,NewLen : SizeInt;
  437. p,pc,OldDestP,NewDestP,RealDestP : pointer;
  438. begin
  439. lowstart:=low(sarr);
  440. { skip empty strings }
  441. while (lowstart<=high(sarr)) and (sarr[lowstart]='') do
  442. inc(lowstart);
  443. if lowstart>high(sarr) then
  444. begin
  445. DestS:=''; { All source strings empty }
  446. exit;
  447. end;
  448. { Calculate size of the result so we can do
  449. a single call to SetLength() }
  450. NewLen:=0;
  451. for i:=lowstart to high(sarr) do
  452. inc(NewLen,length(sarr[i]));
  453. { In the case of the only nonempty string, return it directly. }
  454. if NewLen=PUnicodeRec(Pointer(sarr[lowstart])-UnicodeFirstOff)^.Len then
  455. begin
  456. DestS:=sarr[lowstart];
  457. exit;
  458. end;
  459. OldDestP:=Pointer(DestS);
  460. { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
  461. if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
  462. begin
  463. RealDestP:=OldDestP-UnicodeFirstOff;
  464. NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+NewLen*sizeof(UnicodeChar))+UnicodeFirstOff;
  465. { First string can be skipped if appending. }
  466. if OldDestP=Pointer(sarr[lowstart]) then
  467. inc(lowstart);
  468. end
  469. else
  470. begin
  471. { Create new string. }
  472. OldDestP:=nil; { This case is distinguished as "not assigned(olddestp)". Also prevents "if p=olddestp" in the loop below shared with the ReallocMem branch. }
  473. NewDestP:=NewUnicodeString(NewLen);
  474. end;
  475. { Copy strings from last to the first, so that possible occurences of DestS could read from the beginning of the reallocated DestS. }
  476. pc:=NewDestP+NewLen*sizeof(UnicodeChar);
  477. for i:=high(sarr) downto lowstart do
  478. begin
  479. p:=Pointer(sarr[i]);
  480. if not Assigned(p) then
  481. continue;
  482. if p=OldDestP then
  483. { DestS occured among pieces in the ReallocMem case! Use the new pointer. Its header still conveniently contains old DestS length. }
  484. p:=NewDestP;
  485. Size:=PUnicodeRec(p-UnicodeFirstOff)^.Len*sizeof(UnicodeChar);
  486. dec(pc,size);
  487. Move(p^,pc^,Size);
  488. end;
  489. if not assigned(OldDestP) then
  490. fpc_UnicodeStr_Decr_Ref(Pointer(DestS));
  491. PUnicodeChar(NewDestP)[NewLen]:=#0;
  492. PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=NewLen; { Careful, loop above relies on the old Len in the NewDestP header. }
  493. Pointer(DestS):=NewDestP;
  494. end;
  495. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  496. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  497. {$define FPC_HAS_CHAR_TO_UCHAR}
  498. Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
  499. var
  500. w: unicodestring;
  501. begin
  502. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  503. fpc_Char_To_UChar:=w[1];
  504. end;
  505. {$endif FPC_HAS_CHAR_TO_UCHAR}
  506. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  507. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  508. Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
  509. {
  510. Converts a AnsiChar to a UnicodeString;
  511. }
  512. begin
  513. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  514. end;
  515. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  516. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  517. {$define FPC_HAS_UCHAR_TO_CHAR}
  518. Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
  519. {
  520. Converts a UnicodeChar to a AnsiChar;
  521. }
  522. var
  523. s: ansistring;
  524. begin
  525. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  526. if length(s)=1 then
  527. fpc_UChar_To_Char:= s[1]
  528. else
  529. fpc_UChar_To_Char:='?';
  530. end;
  531. {$endif FPC_HAS_UCHAR_TO_CHAR}
  532. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  533. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  534. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  535. {
  536. Converts a WideChar to a ShortString;
  537. }
  538. var
  539. s: ansistring;
  540. begin
  541. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  542. result:=s;
  543. end;
  544. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  545. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  546. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  547. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  548. {
  549. Converts a UnicodeChar to a UnicodeString;
  550. }
  551. begin
  552. Setlength (fpc_UChar_To_UnicodeStr,1);
  553. fpc_UChar_To_UnicodeStr[1]:= c;
  554. end;
  555. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  556. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  557. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  558. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  559. {
  560. Converts a UnicodeChar to a AnsiString;
  561. }
  562. {$ifndef FPC_HAS_CPSTRING}
  563. var
  564. cp : TSystemCodePage;
  565. {$endif FPC_HAS_CPSTRING}
  566. begin
  567. {$ifndef FPC_HAS_CPSTRING}
  568. cp:=DefaultSystemCodePage;
  569. {$endif FPC_HAS_CPSTRING}
  570. cp:=TranslatePlaceholderCP(cp);
  571. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  572. end;
  573. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  574. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  575. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  576. Function fpc_PChar_To_UnicodeStr(const p : PAnsiChar): UnicodeString; compilerproc;
  577. Var
  578. L : SizeInt;
  579. begin
  580. if (not assigned(p)) or (p[0]=#0) Then
  581. begin
  582. fpc_pchar_to_unicodestr := '';
  583. exit;
  584. end;
  585. l:=IndexChar(p^,-1,#0);
  586. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  587. end;
  588. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  589. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  590. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  591. Function fpc_CharArray_To_UnicodeStr(const arr: array of ansichar; zerobased: boolean = true): UnicodeString; compilerproc;
  592. var
  593. i : SizeInt;
  594. begin
  595. if zerobased then
  596. begin
  597. if arr[0]=#0 Then
  598. begin
  599. fpc_chararray_to_unicodestr:='';
  600. exit;
  601. end;
  602. i:=IndexChar(arr,high(arr)+1,#0);
  603. if i=-1 then
  604. i:=high(arr)+1;
  605. end
  606. else
  607. i:=high(arr)+1;
  608. widestringmanager.Ansi2UnicodeMoveProc(pansichar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  609. end;
  610. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  611. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  612. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  613. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  614. var
  615. i : SizeInt;
  616. begin
  617. if (zerobased) then
  618. begin
  619. i:=IndexWord(arr,high(arr)+1,0);
  620. if i = -1 then
  621. i := high(arr)+1;
  622. end
  623. else
  624. i := high(arr)+1;
  625. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  626. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  627. end;
  628. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  629. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  630. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  631. { due to their names, the following procedures should be in wstrings.inc,
  632. however, the compiler generates code using this functions on all platforms }
  633. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  634. var
  635. l: longint;
  636. index: ptrint;
  637. len: byte;
  638. temp: ansistring;
  639. begin
  640. l := high(arr)+1;
  641. if l>=high(res)+1 then
  642. l:=high(res)
  643. else if l<0 then
  644. l:=0;
  645. if zerobased then
  646. begin
  647. index:=IndexWord(arr[0],l,0);
  648. if index<0 then
  649. len:=l
  650. else
  651. len:=index;
  652. end
  653. else
  654. len:=l;
  655. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  656. res:=temp;
  657. end;
  658. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  659. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  660. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  661. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  662. var
  663. i : SizeInt;
  664. {$ifndef FPC_HAS_CPSTRING}
  665. cp : TSystemCodePage;
  666. {$endif FPC_HAS_CPSTRING}
  667. begin
  668. {$ifndef FPC_HAS_CPSTRING}
  669. cp:=DefaultSystemCodePage;
  670. {$endif FPC_HAS_CPSTRING}
  671. if (zerobased) then
  672. begin
  673. i:=IndexWord(arr,high(arr)+1,0);
  674. if i = -1 then
  675. i := high(arr)+1;
  676. end
  677. else
  678. i := high(arr)+1;
  679. if i > 0 then
  680. begin
  681. cp:=TranslatePlaceholderCP(cp);
  682. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  683. end
  684. else
  685. fpc_WideCharArray_To_AnsiStr:='';
  686. end;
  687. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  688. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  689. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  690. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  691. var
  692. i : SizeInt;
  693. begin
  694. if (zerobased) then
  695. begin
  696. i:=IndexWord(arr,high(arr)+1,0);
  697. if i = -1 then
  698. i := high(arr)+1;
  699. end
  700. else
  701. i := high(arr)+1;
  702. SetLength(fpc_WideCharArray_To_WideStr,i);
  703. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  704. end;
  705. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  706. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  707. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  708. procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
  709. var
  710. len: SizeInt;
  711. temp: ansistring;
  712. begin
  713. len := length(src);
  714. { make sure we don't dereference src if it can be nil (JM) }
  715. if len > 0 then
  716. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  717. len := length(temp);
  718. if len > length(res) then
  719. len := length(res);
  720. {$push}
  721. {$r-}
  722. move(temp[1],res[0],len);
  723. fillchar(res[len],length(res)-len,0);
  724. {$pop}
  725. end;
  726. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  727. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  728. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  729. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  730. var
  731. len: SizeInt;
  732. temp: widestring;
  733. begin
  734. len := length(src);
  735. { make sure we don't dereference src if it can be nil (JM) }
  736. if len > 0 then
  737. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  738. len := length(temp);
  739. if len > length(res) then
  740. len := length(res);
  741. {$push}
  742. {$r-}
  743. move(temp[1],res[0],len*sizeof(widechar));
  744. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  745. {$pop}
  746. end;
  747. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  748. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  749. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  750. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  751. var
  752. len: longint;
  753. temp : widestring;
  754. begin
  755. len := length(src);
  756. { make sure we don't access AnsiChar 1 if length is 0 (JM) }
  757. if len > 0 then
  758. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),DefaultSystemCodePage,temp,len);
  759. len := length(temp);
  760. if len > length(res) then
  761. len := length(res);
  762. {$push}
  763. {$r-}
  764. move(temp[1],res[0],len*sizeof(widechar));
  765. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  766. {$pop}
  767. end;
  768. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  769. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  770. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  771. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  772. var
  773. len: SizeInt;
  774. begin
  775. len := length(src);
  776. if len > length(res) then
  777. len := length(res);
  778. {$push}
  779. {$r-}
  780. { make sure we don't try to access element 1 of the widestring if it's nil }
  781. if len > 0 then
  782. move(src[1],res[0],len*SizeOf(WideChar));
  783. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  784. {$pop}
  785. end;
  786. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  787. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  788. {$define FPC_HAS_UNICODESTR_COMPARE}
  789. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  790. {
  791. Compares 2 UnicodeStrings;
  792. The result is
  793. <0 if S1<S2
  794. 0 if S1=S2
  795. >0 if S1>S2
  796. }
  797. Var
  798. MaxI,Temp : SizeInt;
  799. begin
  800. if pointer(S1)=pointer(S2) then
  801. begin
  802. fpc_UnicodeStr_Compare:=0;
  803. exit;
  804. end;
  805. Maxi:=Length(S1);
  806. temp:=Length(S2);
  807. If MaxI>Temp then
  808. MaxI:=Temp;
  809. Temp:=CompareWord(S1[1],S2[1],MaxI);
  810. if temp=0 then
  811. temp:=Length(S1)-Length(S2);
  812. fpc_UnicodeStr_Compare:=Temp;
  813. end;
  814. {$endif FPC_HAS_UNICODESTR_COMPARE}
  815. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  816. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  817. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  818. {
  819. Compares 2 UnicodeStrings for equality only;
  820. The result is
  821. 0 if S1=S2
  822. <>0 if S1<>S2
  823. }
  824. Var
  825. MaxI : SizeInt;
  826. begin
  827. if pointer(S1)=pointer(S2) then
  828. exit(0);
  829. Maxi:=Length(S1);
  830. If MaxI<>Length(S2) then
  831. exit(-1)
  832. else
  833. exit(CompareWord(S1[1],S2[1],MaxI));
  834. end;
  835. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  836. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  837. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  838. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  839. begin
  840. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  841. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  842. end;
  843. Procedure fpc_UnicodeStr_ZeroBased_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_ZEROBASED_RANGECHECK']; compilerproc;
  844. begin
  845. if (p=nil) or (index>=PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<0) then
  846. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  847. end;
  848. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  849. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  850. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  851. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  852. {
  853. Sets The length of string S to L.
  854. Makes sure S is unique, and contains enough room.
  855. }
  856. Var
  857. Temp : Pointer;
  858. lens, lena : SizeUInt;
  859. begin
  860. if (l>0) then
  861. begin
  862. if Pointer(S)=nil then
  863. begin
  864. { Need a complete new string...}
  865. Pointer(s):=NewUnicodeString(l);
  866. end
  867. else
  868. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  869. begin
  870. Temp:=Pointer(s)-UnicodeFirstOff;
  871. lens:=MemSize(Temp);
  872. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  873. if (lena>lens) or ((lens>32) and (lena<=SizeInt(SizeUint(lens) div 2))) then
  874. Pointer(S):=reallocmem(Temp, lena)+UnicodeFirstOff;
  875. end
  876. else
  877. begin
  878. { Reallocation is needed... }
  879. Temp:=NewUnicodeString(l);
  880. { also move terminating null }
  881. lens:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len+1;
  882. if l<lens then
  883. lens:=l;
  884. Move(Pointer(S)^,Temp^,lens * Sizeof(UnicodeChar));
  885. fpc_unicodestr_decr_ref(Pointer(S));
  886. Pointer(S):=Temp;
  887. end;
  888. { Force nil termination in case it gets shorter }
  889. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  890. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l;
  891. end
  892. else { length=0, deallocate the string }
  893. fpc_unicodestr_decr_ref (Pointer(S));
  894. end;
  895. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  896. {*****************************************************************************
  897. Public functions, In interface.
  898. *****************************************************************************}
  899. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  900. begin
  901. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  902. end;
  903. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  904. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  905. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  906. begin
  907. result:=StringToWideChar(Src,Dest,DestSize);
  908. end;
  909. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  910. function WideCharToString(S : PWideChar) : UnicodeString;
  911. begin
  912. result:=WideCharLenToString(s,Length(WideString(s)));
  913. end;
  914. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  915. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  916. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  917. var
  918. temp: widestring;
  919. Len: SizeInt;
  920. begin
  921. widestringmanager.Ansi2WideMoveProc(PAnsiChar(Src),StringCodePage(Src),temp,Length(Src));
  922. Len:=Length(temp);
  923. if DestSize<=Len then
  924. Len:=Destsize-1;
  925. move(temp[1],Dest^,Len*SizeOf(WideChar));
  926. Dest[Len]:=#0;
  927. result:=Dest;
  928. end;
  929. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  930. function UnicodeFromLocaleChars(CodePage, Flags: Cardinal; LocaleStr: PAnsiChar;
  931. LocaleStrLen: Integer; UnicodeStr: PWideChar; UnicodeStrLen: Integer): Integer; overload;
  932. var
  933. temp: widestring;
  934. Len: SizeInt;
  935. begin
  936. widestringmanager.Ansi2WideMoveProc(LocaleStr,CodePage,temp,LocaleStrLen);
  937. Len:=Length(temp);
  938. // Only move when we have room.
  939. if (UnicodeStrLen>0) then
  940. begin
  941. if UnicodeStrLen<=Len then
  942. Len:=UnicodeStrLen-1;
  943. move(temp[1],UnicodeStr^,Len*SizeOf(WideChar));
  944. UnicodeStr[Len]:=#0;
  945. end;
  946. // Return length
  947. result:=len;
  948. end;
  949. function UnicodeFromLocaleChars(const LocaleName: AnsiString; Flags: Cardinal;
  950. LocaleStr: PAnsiChar; LocaleStrLen: Integer; UnicodeStr: PWideChar;
  951. UnicodeStrLen: Integer): Integer; overload;
  952. var
  953. CP : TSystemCodePage;
  954. begin
  955. if not LocaleNameToCodePage(LocaleName,CP) then
  956. Result:=0
  957. else
  958. Result:=UnicodeFromLocaleChars(CP,Flags,LocaleStr,LocaleStrLen,UnicodeStr,UnicodeStrLen);
  959. end;
  960. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  961. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  962. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  963. begin
  964. SetLength(result,Len);
  965. Move(S^,Pointer(Result)^,Len*2);
  966. end;
  967. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  968. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  969. begin
  970. Dest:=UnicodeCharLenToString(Src,Len);
  971. end;
  972. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  973. begin
  974. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  975. end;
  976. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  977. begin
  978. Dest:=AnsiString(UnicodeCharToString(S));
  979. end;
  980. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  981. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  982. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  983. begin
  984. SetLength(result,Len);
  985. Move(S^,Pointer(Result)^,Len*2);
  986. end;
  987. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  988. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  989. begin
  990. Dest:=WideCharLenToString(Src,Len);
  991. end;
  992. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  993. begin
  994. Dest:=AnsiString(WideCharLenToString(Src,Len));
  995. end;
  996. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  997. begin
  998. Dest:=WideCharToString(S);
  999. end;
  1000. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  1001. begin
  1002. Dest:=AnsiString(WideCharToString(S));
  1003. end;
  1004. Function fpc_unicodestr_Unique_func(Var S : UnicodeString): Pointer; external name 'FPC_UNICODESTR_UNIQUE';
  1005. Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
  1006. begin
  1007. fpc_unicodestr_Unique_func(S);
  1008. end;
  1009. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  1010. {$define FPC_HAS_UNICODESTR_UNIQUE}
  1011. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1012. {
  1013. Make sure reference count of S is 1,
  1014. using copy-on-write semantics.
  1015. }
  1016. Var
  1017. SNew : Pointer;
  1018. L : SizeInt;
  1019. begin
  1020. pointer(result) := pointer(s);
  1021. If (result<>nil) and (PUnicodeRec(result-UnicodeFirstOff)^.Ref<>1) then
  1022. begin
  1023. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1024. SNew:=NewUnicodeString (L);
  1025. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1026. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1027. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1028. pointer(S):=SNew;
  1029. pointer(result):=SNew;
  1030. end;
  1031. end;
  1032. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1033. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1034. {$define FPC_HAS_UNICODESTR_COPY}
  1035. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1036. var
  1037. Lim : SizeInt;
  1038. ResultAddress : Pointer;
  1039. begin
  1040. ResultAddress:=Nil;
  1041. dec(index);
  1042. if Index < 0 then
  1043. Index := 0;
  1044. Lim:=Length(S)-Index; { Cannot overflow as both Length(S) and Index are non-negative. }
  1045. if Size>Lim then
  1046. Size:=Lim;
  1047. If Size>0 then
  1048. begin
  1049. ResultAddress:=NewUnicodeString(Size);
  1050. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1051. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1052. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1053. end;
  1054. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1055. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1056. end;
  1057. {$endif FPC_HAS_UNICODESTR_COPY}
  1058. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1059. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1060. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1061. var
  1062. i,MaxLen,nsource,nsub,d : SizeInt;
  1063. begin
  1064. Pos:=0;
  1065. nsource:=Length(Source);
  1066. nsub:=Length(Substr);
  1067. if (nsub>0) and (Offset>0) and (Offset<=nsource) then
  1068. begin
  1069. MaxLen:=nsource-nsub+1;
  1070. i:=Offset;
  1071. while (i<=MaxLen) do
  1072. begin
  1073. d:=IndexWord(Source[i],MaxLen-i+1,word(Substr[1]));
  1074. if d<0 then
  1075. exit;
  1076. if CompareWord(Substr[1],Source[i+d],nsub)=0 then
  1077. exit(i+d);
  1078. i:=i+d+1;
  1079. end;
  1080. end;
  1081. end;
  1082. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1083. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1084. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1085. { Faster version for a unicodechar alone }
  1086. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1087. var
  1088. ns,idx: SizeInt;
  1089. begin
  1090. pos:=0;
  1091. ns:=length(s);
  1092. if (Offset>0) and (Offset<=ns) then
  1093. begin
  1094. idx:=IndexWord(s[Offset],ns-Offset+1,word(c));
  1095. if idx>=0 then
  1096. pos:=Offset+idx;
  1097. end;
  1098. end;
  1099. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1100. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1101. block, which is significant bloat without any sensible speed improvement. }
  1102. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1103. begin
  1104. result:=Pos(UnicodeString(c),s,offset);
  1105. end;
  1106. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1107. begin
  1108. result:=Pos(UnicodeString(c),s,OffSet);
  1109. end;
  1110. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1111. begin
  1112. result:=Pos(c,UnicodeString(s),OffSet);
  1113. end;
  1114. {$ifndef FPC_HAS_UNICODESTR_OF_CHAR}
  1115. {$define FPC_HAS_UNICODESTR_OF_CHAR}
  1116. Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
  1117. begin
  1118. SetLength(StringOfChar,l);
  1119. FillWord(Pointer(StringOfChar)^,Length(StringOfChar),word(c));
  1120. end;
  1121. {$endif}
  1122. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1123. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1124. { Faster version for a AnsiChar alone. Must be implemented because }
  1125. { pos(c: AnsiChar; const s: shortstring) also exists, so otherwise }
  1126. { using pos(AnsiChar,pansichar) will always call the shortstring version }
  1127. { (exact match for first argument), also with $h+ (JM) }
  1128. Function Pos (c : AnsiChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1129. var
  1130. ns,idx: SizeInt;
  1131. begin
  1132. pos:=0;
  1133. ns:=length(s);
  1134. if (Offset>0) and (Offset<=ns) then
  1135. begin
  1136. idx:=IndexWord(s[Offset],ns-Offset+1,word(unicodechar(c)));
  1137. if idx>=0 then
  1138. pos:=Offset+idx;
  1139. end;
  1140. end;
  1141. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1142. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1143. {$define FPC_HAS_DELETE_UNICODESTR}
  1144. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1145. Var
  1146. LS : SizeInt;
  1147. begin
  1148. LS:=Length(S);
  1149. if (Index>LS) or (Index<=0) or (Size<=0) then
  1150. exit;
  1151. UniqueString (S);
  1152. { (Size+Index) will overflow if Size=MaxInt. }
  1153. if Size>LS-Index then
  1154. Size:=LS-Index+1;
  1155. if Size<=LS-Index then
  1156. begin
  1157. Dec(Index);
  1158. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1159. end;
  1160. Setlength(s,LS-Size);
  1161. end;
  1162. {$endif FPC_HAS_DELETE_UNICODESTR}
  1163. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1164. {$define FPC_HAS_INSERT_UNICODESTR}
  1165. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1166. var
  1167. Temp : UnicodeString;
  1168. LS : SizeInt;
  1169. begin
  1170. If Length(Source)=0 then
  1171. exit;
  1172. if index <= 0 then
  1173. index := 1;
  1174. Ls:=Length(S);
  1175. if index > LS then
  1176. index := LS+1;
  1177. Dec(Index);
  1178. SetLength(Temp,Length(Source)+LS);
  1179. If Index>0 then
  1180. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1181. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1182. If (LS-Index)>0 then
  1183. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1184. S:=Temp;
  1185. end;
  1186. {$endif FPC_HAS_INSERT_UNICODESTR}
  1187. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1188. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1189. Function UpCase(c:UnicodeChar):UnicodeChar;
  1190. begin
  1191. Result:= widestringmanager.UpperUnicodeStringProc(UnicodeString(c))[1]
  1192. end;
  1193. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1194. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1195. {$define FPC_HAS_UPCASE_UNICODESTR}
  1196. function UpCase(const s : UnicodeString) : UnicodeString;
  1197. begin
  1198. result:=widestringmanager.UpperUnicodeStringProc(s);
  1199. end;
  1200. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1201. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1202. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1203. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1204. begin
  1205. Result:= widestringmanager.LowerUnicodeStringProc(UnicodeString(c))[1]
  1206. end;
  1207. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1208. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1209. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1210. function LowerCase(const s : UnicodeString) : UnicodeString;
  1211. begin
  1212. result:=widestringmanager.LowerUnicodeStringProc(s);
  1213. end;
  1214. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1215. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1216. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1217. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1218. begin
  1219. SetLength(S,Len);
  1220. If (Buf<>Nil) and (Len>0) then
  1221. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1222. end;
  1223. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1224. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1225. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1226. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PAnsiChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1227. begin
  1228. If (Buf<>Nil) and (Len>0) then
  1229. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1230. else
  1231. SetLength(S,Len);
  1232. end;
  1233. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1234. {$ifndef FPUNONE}
  1235. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1236. Var
  1237. SS: ShortString;
  1238. begin
  1239. fpc_Val_Real_UnicodeStr:=0;
  1240. if length(S)>255 then
  1241. code:=256
  1242. else
  1243. begin
  1244. SS:=ShortString(S);
  1245. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1246. end;
  1247. end;
  1248. {$endif}
  1249. {$ifndef FPC_STR_ENUM_INTERN}
  1250. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1251. var
  1252. ss: ShortString;
  1253. begin
  1254. if length(s)>255 then
  1255. code:=256
  1256. else
  1257. begin
  1258. ss:=ShortString(s);
  1259. val(ss,fpc_val_enum_unicodestr,code);
  1260. end;
  1261. end;
  1262. {$endif FPC_STR_ENUM_INTERN}
  1263. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1264. Var
  1265. SS: ShortString;
  1266. begin
  1267. if length(S)>255 then
  1268. begin
  1269. fpc_Val_Currency_UnicodeStr:=0;
  1270. code:=256;
  1271. end
  1272. else
  1273. begin
  1274. SS:=ShortString(S);
  1275. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1276. end;
  1277. end;
  1278. Function fpc_Val_UInt_UnicodeStr ({$ifndef VER3_2}DestSize: SizeInt;{$endif VER3_2} Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1279. Var
  1280. SS: ShortString;
  1281. begin
  1282. fpc_Val_UInt_UnicodeStr:=0;
  1283. if length(S)>255 then
  1284. code:=256
  1285. else
  1286. begin
  1287. SS:=ShortString(S);
  1288. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1289. end;
  1290. end;
  1291. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1292. Var
  1293. SS: ShortString;
  1294. begin
  1295. fpc_Val_SInt_UnicodeStr:=0;
  1296. if length(S)>255 then
  1297. code:=256
  1298. else
  1299. begin
  1300. SS:=ShortString(S);
  1301. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1302. end;
  1303. end;
  1304. {$ifndef CPU64}
  1305. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1306. Var
  1307. SS: ShortString;
  1308. begin
  1309. fpc_Val_qword_UnicodeStr:=0;
  1310. if length(S)>255 then
  1311. code:=256
  1312. else
  1313. begin
  1314. SS:=ShortString(S);
  1315. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1316. end;
  1317. end;
  1318. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1319. Var
  1320. SS: ShortString;
  1321. begin
  1322. fpc_Val_int64_UnicodeStr:=0;
  1323. if length(S)>255 then
  1324. code:=256
  1325. else
  1326. begin
  1327. SS:=ShortString(S);
  1328. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1329. end;
  1330. end;
  1331. {$endif CPU64}
  1332. {$if defined(CPU16) or defined(CPU8)}
  1333. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1334. Var
  1335. SS: ShortString;
  1336. begin
  1337. fpc_Val_longword_UnicodeStr:=0;
  1338. if length(S)>255 then
  1339. code:=256
  1340. else
  1341. begin
  1342. SS:=ShortString(S);
  1343. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1344. end;
  1345. end;
  1346. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1347. Var
  1348. SS: ShortString;
  1349. begin
  1350. fpc_Val_longint_UnicodeStr:=0;
  1351. if length(S)>255 then
  1352. code:=256
  1353. else
  1354. begin
  1355. SS:=ShortString(S);
  1356. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1357. end;
  1358. end;
  1359. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1360. Var
  1361. SS: ShortString;
  1362. begin
  1363. fpc_Val_word_UnicodeStr:=0;
  1364. if length(S)>255 then
  1365. code:=256
  1366. else
  1367. begin
  1368. SS:=ShortString(S);
  1369. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1370. end;
  1371. end;
  1372. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1373. Var
  1374. SS: ShortString;
  1375. begin
  1376. fpc_Val_smallint_UnicodeStr:=0;
  1377. if length(S)>255 then
  1378. code:=256
  1379. else
  1380. begin
  1381. SS:=ShortString(S);
  1382. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1383. end;
  1384. end;
  1385. {$endif CPU16 or CPU8}
  1386. {$ifndef FPUNONE}
  1387. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1388. var
  1389. ss: shortstring;
  1390. begin
  1391. str_real(len,fr,d,treal_type(rt),ss);
  1392. s:=UnicodeString(ss);
  1393. end;
  1394. {$endif}
  1395. {$ifndef FPC_STR_ENUM_INTERN}
  1396. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1397. var
  1398. ss: ShortString;
  1399. begin
  1400. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1401. s:=UnicodeString(ss);
  1402. end;
  1403. {$endif FPC_STR_ENUM_INTERN}
  1404. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1405. var
  1406. ss: ShortString;
  1407. begin
  1408. fpc_shortstr_bool(b,len,ss);
  1409. s:=UnicodeString(ss);
  1410. end;
  1411. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1412. var
  1413. ss: shortstring;
  1414. begin
  1415. str(c:len:fr,ss);
  1416. s:=UnicodeString(ss);
  1417. end;
  1418. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1419. Var
  1420. SS: ShortString;
  1421. begin
  1422. Str (v:Len,SS);
  1423. S:=UnicodeString(SS);
  1424. end;
  1425. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1426. Var
  1427. SS: ShortString;
  1428. begin
  1429. str(v:Len,SS);
  1430. S:=UnicodeString(SS);
  1431. end;
  1432. {$ifndef CPU64}
  1433. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1434. Var
  1435. SS: ShortString;
  1436. begin
  1437. Str (v:Len,SS);
  1438. S:=UnicodeString(SS);
  1439. end;
  1440. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1441. Var
  1442. SS: ShortString;
  1443. begin
  1444. str(v:Len,SS);
  1445. S:=UnicodeString(SS);
  1446. end;
  1447. {$endif CPU64}
  1448. {$if defined(CPU16) or defined(CPU8)}
  1449. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1450. Var
  1451. SS: ShortString;
  1452. begin
  1453. Str (v:Len,SS);
  1454. S:=UnicodeString(SS);
  1455. end;
  1456. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1457. Var
  1458. SS: ShortString;
  1459. begin
  1460. str(v:Len,SS);
  1461. S:=UnicodeString(SS);
  1462. end;
  1463. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1464. Var
  1465. SS: ShortString;
  1466. begin
  1467. Str (v:Len,SS);
  1468. S:=UnicodeString(SS);
  1469. end;
  1470. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1471. Var
  1472. SS: ShortString;
  1473. begin
  1474. str(v:Len,SS);
  1475. S:=UnicodeString(SS);
  1476. end;
  1477. {$endif CPU16 or CPU8}
  1478. function UnicodeToUtf8(Dest: PAnsiChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1479. begin
  1480. if assigned(Source) then
  1481. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1482. else
  1483. Result:=0;
  1484. end;
  1485. function UnicodeToUtf8(Dest: PAnsiChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1486. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1487. begin
  1488. runerror(217);
  1489. end;
  1490. {$else EXCLUDE_COMPLEX_PROCS}
  1491. var
  1492. i,j : SizeUInt;
  1493. lw : longword;
  1494. begin
  1495. result:=0;
  1496. if source=nil then
  1497. exit;
  1498. i:=0;
  1499. j:=0;
  1500. if assigned(Dest) then
  1501. begin
  1502. while (i<SourceChars) and (j<MaxDestBytes) do
  1503. begin
  1504. lw:=ord(Source[i]);
  1505. case lw of
  1506. 0..$7f:
  1507. begin
  1508. Dest[j]:=AnsiChar(lw);
  1509. inc(j);
  1510. end;
  1511. $80..$7ff:
  1512. begin
  1513. if j+1>=MaxDestBytes then
  1514. break;
  1515. Dest[j]:=AnsiChar($c0 or (lw shr 6));
  1516. Dest[j+1]:=AnsiChar($80 or (lw and $3f));
  1517. inc(j,2);
  1518. end;
  1519. $800..$d7ff,$e000..$ffff:
  1520. begin
  1521. if j+2>=MaxDestBytes then
  1522. break;
  1523. Dest[j]:=AnsiChar($e0 or (lw shr 12));
  1524. Dest[j+1]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1525. Dest[j+2]:=AnsiChar($80 or (lw and $3f));
  1526. inc(j,3);
  1527. end;
  1528. $d800..$dbff:
  1529. {High Surrogates}
  1530. begin
  1531. if j+3>=MaxDestBytes then
  1532. break;
  1533. if (i+1<sourcechars) and
  1534. (word(Source[i+1]) >= $dc00) and
  1535. (word(Source[i+1]) <= $dfff) then
  1536. begin
  1537. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1538. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1539. Dest[j]:=AnsiChar($f0 or (lw shr 18));
  1540. Dest[j+1]:=AnsiChar($80 or ((lw shr 12) and $3f));
  1541. Dest[j+2]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1542. Dest[j+3]:=AnsiChar($80 or (lw and $3f));
  1543. inc(j,4);
  1544. inc(i);
  1545. end;
  1546. end;
  1547. end;
  1548. inc(i);
  1549. end;
  1550. if j>SizeUInt(MaxDestBytes-1) then
  1551. j:=MaxDestBytes-1;
  1552. Dest[j]:=#0;
  1553. end
  1554. else
  1555. begin
  1556. while i<SourceChars do
  1557. begin
  1558. case word(Source[i]) of
  1559. $0..$7f:
  1560. inc(j);
  1561. $80..$7ff:
  1562. inc(j,2);
  1563. $800..$d7ff,$e000..$ffff:
  1564. inc(j,3);
  1565. $d800..$dbff:
  1566. begin
  1567. if (i+1<sourcechars) and
  1568. (word(Source[i+1]) >= $dc00) and
  1569. (word(Source[i+1]) <= $dfff) then
  1570. begin
  1571. inc(j,4);
  1572. inc(i);
  1573. end;
  1574. end;
  1575. end;
  1576. inc(i);
  1577. end;
  1578. end;
  1579. result:=j+1;
  1580. end;
  1581. {$endif EXCLUDE_COMPLEX_PROCS}
  1582. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PAnsiChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1583. begin
  1584. if assigned(Source) then
  1585. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
  1586. else
  1587. Result:=0;
  1588. end;
  1589. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1590. begin
  1591. Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
  1592. end;
  1593. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
  1594. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1595. begin
  1596. runerror(217);
  1597. end;
  1598. {$else EXCLUDE_COMPLEX_PROCS}
  1599. const
  1600. UNICODE_INVALID=63;
  1601. var
  1602. InputUTF8: SizeUInt;
  1603. IBYTE: BYTE;
  1604. OutputUnicode: SizeUInt;
  1605. PRECHAR: SizeUInt;
  1606. TempBYTE: BYTE;
  1607. CharLen: SizeUint;
  1608. LookAhead: SizeUInt;
  1609. UC: SizeUInt;
  1610. begin
  1611. if not assigned(Source) then
  1612. begin
  1613. result:=0;
  1614. exit;
  1615. end;
  1616. result:=SizeUInt(-1);
  1617. InputUTF8:=0;
  1618. OutputUnicode:=0;
  1619. PreChar:=0;
  1620. if Assigned(Dest) Then
  1621. begin
  1622. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1623. begin
  1624. IBYTE:=byte(Source[InputUTF8]);
  1625. if (IBYTE and $80) = 0 then
  1626. begin
  1627. // One character US-ASCII, convert it to unicode
  1628. // Commented code to convert LF to CRLF has been removed
  1629. Dest[OutputUnicode]:=WideChar(IBYTE);
  1630. inc(OutputUnicode);
  1631. PreChar:=IBYTE;
  1632. inc(InputUTF8);
  1633. end
  1634. else
  1635. begin
  1636. TempByte:=IBYTE;
  1637. CharLen:=0;
  1638. while (TempBYTE and $80)<>0 do
  1639. begin
  1640. TempBYTE:=(TempBYTE shl 1) and $FE;
  1641. inc(CharLen);
  1642. end;
  1643. //Test for the "CharLen" conforms UTF-8 string
  1644. //This means the 10xxxxxx pattern.
  1645. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1646. begin
  1647. //Insuficient chars in string to decode
  1648. //UTF-8 array. Fallback to single AnsiChar.
  1649. CharLen:= 1;
  1650. end;
  1651. for LookAhead := 1 to CharLen-1 do
  1652. begin
  1653. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1654. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1655. begin
  1656. //Invalid UTF-8 sequence, fallback.
  1657. CharLen:= LookAhead;
  1658. break;
  1659. end;
  1660. end;
  1661. UC:=$FFFF;
  1662. case CharLen of
  1663. 1: begin
  1664. //Not valid UTF-8 sequence
  1665. UC:=UNICODE_INVALID;
  1666. end;
  1667. 2: begin
  1668. //Two bytes UTF, convert it
  1669. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1670. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1671. if UC <= $7F then
  1672. begin
  1673. //Invalid UTF sequence.
  1674. UC:=UNICODE_INVALID;
  1675. end;
  1676. end;
  1677. 3: begin
  1678. //Three bytes, convert it to unicode
  1679. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1680. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1681. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1682. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1683. begin
  1684. //Invalid UTF-8 sequence
  1685. UC:= UNICODE_INVALID;
  1686. End;
  1687. end;
  1688. 4: begin
  1689. //Four bytes, convert it to two unicode characters
  1690. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1691. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1692. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1693. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1694. if (UC < $10000) or (UC > $10FFFF) then
  1695. begin
  1696. UC:= UNICODE_INVALID;
  1697. end
  1698. else
  1699. begin
  1700. { only store pair if room }
  1701. dec(UC,$10000);
  1702. if (OutputUnicode<MaxDestChars-1) then
  1703. begin
  1704. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1705. inc(OutputUnicode);
  1706. UC:=(UC and $3ff) + $DC00;
  1707. end
  1708. else
  1709. begin
  1710. InputUTF8:= InputUTF8 + CharLen;
  1711. { don't store anything }
  1712. CharLen:=0;
  1713. end;
  1714. end;
  1715. end;
  1716. 5,6,7: begin
  1717. //Invalid UTF8 to unicode conversion,
  1718. //mask it as invalid UNICODE too.
  1719. UC:=UNICODE_INVALID;
  1720. end;
  1721. end;
  1722. if CharLen > 0 then
  1723. begin
  1724. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1725. HandleError(231); // Will be converted to EConversionError in sysutils
  1726. PreChar:=UC;
  1727. Dest[OutputUnicode]:=WideChar(UC);
  1728. inc(OutputUnicode);
  1729. end;
  1730. InputUTF8:= InputUTF8 + CharLen;
  1731. end;
  1732. end;
  1733. Result:=OutputUnicode+1;
  1734. end
  1735. else
  1736. begin
  1737. while (InputUTF8<SourceBytes) do
  1738. begin
  1739. IBYTE:=byte(Source[InputUTF8]);
  1740. if (IBYTE and $80) = 0 then
  1741. begin
  1742. // One character US-ASCII, convert it to unicode
  1743. // Commented code to convert LF to CRLF has been removed
  1744. inc(OutputUnicode);
  1745. PreChar:=IBYTE;
  1746. inc(InputUTF8);
  1747. end
  1748. else
  1749. begin
  1750. TempByte:=IBYTE;
  1751. CharLen:=0;
  1752. while (TempBYTE and $80)<>0 do
  1753. begin
  1754. TempBYTE:=(TempBYTE shl 1) and $FE;
  1755. inc(CharLen);
  1756. end;
  1757. //Test for the "CharLen" conforms UTF-8 string
  1758. //This means the 10xxxxxx pattern.
  1759. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1760. begin
  1761. //Insuficient chars in string to decode
  1762. //UTF-8 array. Fallback to single AnsiChar.
  1763. CharLen:= 1;
  1764. end;
  1765. for LookAhead := 1 to CharLen-1 do
  1766. begin
  1767. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1768. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1769. begin
  1770. //Invalid UTF-8 sequence, fallback.
  1771. CharLen:= LookAhead;
  1772. break;
  1773. end;
  1774. end;
  1775. UC:=$FFFF;
  1776. case CharLen of
  1777. 1: begin
  1778. //Not valid UTF-8 sequence
  1779. UC:=UNICODE_INVALID;
  1780. end;
  1781. 2: begin
  1782. //Two bytes UTF, convert it
  1783. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1784. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1785. if UC <= $7F then
  1786. begin
  1787. //Invalid UTF sequence.
  1788. UC:=UNICODE_INVALID;
  1789. end;
  1790. end;
  1791. 3: begin
  1792. //Three bytes, convert it to unicode
  1793. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1794. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1795. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1796. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1797. begin
  1798. //Invalid UTF-8 sequence
  1799. UC:= UNICODE_INVALID;
  1800. end;
  1801. end;
  1802. 4: begin
  1803. //Four bytes, convert it to two unicode characters
  1804. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1805. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1806. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1807. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1808. if (UC < $10000) or (UC > $10FFFF) then
  1809. UC:= UNICODE_INVALID
  1810. else
  1811. { extra character character }
  1812. inc(OutputUnicode);
  1813. end;
  1814. 5,6,7: begin
  1815. //Invalid UTF8 to unicode conversion,
  1816. //mask it as invalid UNICODE too.
  1817. UC:=UNICODE_INVALID;
  1818. end;
  1819. end;
  1820. if CharLen > 0 then
  1821. begin
  1822. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1823. HandleError(231); // Will be converted to EConversionError in sysutils
  1824. PreChar:=UC;
  1825. inc(OutputUnicode);
  1826. end;
  1827. InputUTF8:= InputUTF8 + CharLen;
  1828. end;
  1829. end;
  1830. Result:=OutputUnicode+1;
  1831. end;
  1832. end;
  1833. {$endif EXCLUDE_COMPLEX_PROCS}
  1834. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1835. begin
  1836. Result:=UTF8Encode(UnicodeString(s));
  1837. end;
  1838. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1839. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1840. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1841. var
  1842. i : SizeInt;
  1843. hs : UTF8String;
  1844. begin
  1845. result:='';
  1846. if Length(s)=0 then
  1847. exit;
  1848. SetLength(hs,length(s)*3);
  1849. i:=UnicodeToUtf8(pansichar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1850. if i>0 then
  1851. begin
  1852. SetLength(hs,i-1);
  1853. result:=hs;
  1854. end;
  1855. end;
  1856. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1857. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1858. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1859. function UTF8Decode(const s : RawByteString): UnicodeString;
  1860. var
  1861. i : SizeInt;
  1862. hs : UnicodeString;
  1863. begin
  1864. result:='';
  1865. if Length(s)=0 then
  1866. exit;
  1867. SetLength(hs,length(s));
  1868. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pansichar(s),length(s));
  1869. if i>0 then
  1870. begin
  1871. SetLength(hs,i-1);
  1872. result:=hs;
  1873. end;
  1874. end;
  1875. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1876. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1877. begin
  1878. Result:=Utf8Encode(s);
  1879. end;
  1880. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1881. begin
  1882. Result:=RawByteString(Utf8Decode(s));
  1883. end;
  1884. {$ifdef FPC_HAS_FEATURE_DYNARRAYS}
  1885. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1886. var
  1887. i, reslen: sizeint;
  1888. w: longint;
  1889. begin
  1890. reslen:=0;
  1891. i:=0;
  1892. { calculate required length }
  1893. while (i<len) do
  1894. begin
  1895. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1896. inc(i)
  1897. else if (p[i]<=#$dbff) and
  1898. (i+1<len) and
  1899. (p[i+1]>=#$dc00) and
  1900. (p[i+1]<=#$dfff) then
  1901. inc(i,2)
  1902. else
  1903. inc(i);
  1904. inc(reslen);
  1905. end;
  1906. SetLength(res,reslen+1); { +1 for null termination }
  1907. reslen:=0;
  1908. i:=0;
  1909. { do conversion }
  1910. while (i<len) do
  1911. begin
  1912. w:=ord(p[i]);
  1913. if (w<=$d7ff) or (w>=$e000) then
  1914. res[reslen]:=w
  1915. else if (w<=$dbff) and
  1916. (i+1<len) and
  1917. (p[i+1]>=#$dc00) and
  1918. (p[i+1]<=#$dfff) then
  1919. begin
  1920. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1921. inc(i);
  1922. end
  1923. else { invalid surrogate pair }
  1924. res[reslen]:=w;
  1925. inc(i);
  1926. inc(reslen);
  1927. end;
  1928. res[reslen]:=0;
  1929. end;
  1930. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1931. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1932. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1933. begin
  1934. UCS4Encode(PWideChar(s),Length(s),result);
  1935. end;
  1936. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1937. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1938. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1939. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1940. begin
  1941. UCS4Encode(PWideChar(s),Length(s),result);
  1942. end;
  1943. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1944. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1945. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1946. { dest should point to previously allocated wide/unicodestring }
  1947. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1948. var
  1949. i: sizeint;
  1950. nc: UCS4Char;
  1951. begin
  1952. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1953. begin
  1954. nc:=s[i];
  1955. if (nc<=$ffff) then
  1956. dest^:=widechar(nc)
  1957. else if (dword(nc)<=$10ffff) then
  1958. begin
  1959. dest^:=widechar(nc shr 10 + $d7c0);
  1960. { subtracting $10000 doesn't change low 10 bits }
  1961. dest[1]:=widechar(nc and $3ff + $dc00);
  1962. inc(dest);
  1963. end
  1964. else { invalid code point }
  1965. dest^:='?';
  1966. inc(dest);
  1967. end;
  1968. end;
  1969. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1970. var
  1971. i : SizeInt;
  1972. reslen : SizeInt;
  1973. begin
  1974. reslen:=0;
  1975. for i:=0 to length(s)-2 do { skip terminating #0 }
  1976. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1977. SetLength(result,reslen);
  1978. UCS4Decode(s,pointer(result));
  1979. end;
  1980. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1981. var
  1982. i : SizeInt;
  1983. reslen : SizeInt;
  1984. begin
  1985. reslen:=0;
  1986. for i:=0 to length(s)-2 do { skip terminating #0 }
  1987. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1988. SetLength(result,reslen);
  1989. UCS4Decode(s,pointer(result));
  1990. end;
  1991. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1992. {$endif FPC_HAS_FEATURE_DYNARRAYS}
  1993. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1994. const
  1995. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  1996. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  1997. procedure unimplementedunicodestring;
  1998. begin
  1999. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  2000. {$ifndef HAS_WIDESTRINGMANAGER}
  2001. {$ifndef FPC_SYSTEM_NO_VERBOSE_UNICODEERROR}
  2002. If IsConsole then
  2003. begin
  2004. Writeln(StdErr,SNoUnicodestrings);
  2005. Writeln(StdErr,SRecompileWithUnicodestrings);
  2006. end;
  2007. {$endif FPC_SYSTEM_NO_VERBOSE_UNICODEERROR}
  2008. {$endif HAS_WIDESTRINGMANAGER}
  2009. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  2010. HandleErrorAddrFrameInd(234{RuntimeErrorExitCodes[reCodesetConversion]},get_pc_addr,get_frame);
  2011. end;
  2012. function StringElementSize(const S: UnicodeString): Word; overload;
  2013. begin
  2014. if assigned(Pointer(S)) then
  2015. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2016. else
  2017. Result:=SizeOf(UnicodeChar);
  2018. end;
  2019. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2020. begin
  2021. if assigned(Pointer(S)) then
  2022. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2023. else
  2024. Result:=0;
  2025. end;
  2026. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2027. begin
  2028. {$ifdef FPC_HAS_CPSTRING}
  2029. if assigned(Pointer(S)) then
  2030. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2031. else
  2032. {$endif FPC_HAS_CPSTRING}
  2033. Result:=DefaultUnicodeCodePage;
  2034. end;
  2035. {$push}
  2036. {$warnings off}
  2037. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2038. begin
  2039. unimplementedunicodestring;
  2040. end;
  2041. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2042. begin
  2043. unimplementedunicodestring;
  2044. end;
  2045. function StubWideCase(const s: WideString): WideString;
  2046. begin
  2047. unimplementedunicodestring;
  2048. end;
  2049. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2050. begin
  2051. unimplementedunicodestring;
  2052. end;
  2053. {$pop}
  2054. procedure initunicodestringmanager;
  2055. begin
  2056. {$ifndef HAS_WIDESTRINGMANAGER}
  2057. widestringmanager:=Default(TUnicodeStringManager);
  2058. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2059. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2060. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2061. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2062. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2063. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2064. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2065. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2066. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2067. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2068. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2069. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2070. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2071. {$endif HAS_WIDESTRINGMANAGER}
  2072. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2073. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2074. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2075. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2076. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2077. end;
  2078. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2079. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2080. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2081. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2082. Begin
  2083. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2084. DefaultFileSystemCodePage,Length(Str));
  2085. End;
  2086. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2087. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2088. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2089. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2090. Begin
  2091. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2092. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2093. End;
  2094. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2095. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2096. Begin
  2097. Result:=Str;
  2098. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2099. End;
  2100. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2101. ignore any codepage }
  2102. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2103. begin
  2104. Result := UTF8Decode(S);
  2105. end;
  2106. function UTF8ToUnicodeString(const s : RawByteString): UnicodeString;
  2107. begin
  2108. Result := UTF8Decode(S);
  2109. end;
  2110. function UTF8ToString(const S: ShortString): UnicodeString;
  2111. Var
  2112. rs: RawByteString;
  2113. begin
  2114. rs:=S;
  2115. Result := UTF8Decode(rs);
  2116. end;
  2117. function UTF8ToUnicodeString(const S: ShortString): unicodestring;
  2118. begin
  2119. Result:=UTF8ToString(S);
  2120. end;
  2121. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2122. var
  2123. rs: RawByteString;
  2124. Count: Integer;
  2125. begin
  2126. Count := length(S);
  2127. SetLength(rs, Count);
  2128. if Count > 0 then
  2129. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2130. Result := UTF8ToString(rs);
  2131. end;
  2132. function UTF8ToUnicodeString(const S: PAnsiChar): unicodestring;
  2133. begin
  2134. Result:=UTF8ToString(S);
  2135. end;
  2136. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2137. are as well }
  2138. {$ifndef CPUJVM}
  2139. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2140. var
  2141. rs: RawByteString;
  2142. Count: Integer;
  2143. begin
  2144. Count := Length(S);
  2145. SetLength(rs, Count);
  2146. if Count > 0 then
  2147. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2148. Result := UTF8ToString(rs);
  2149. end;
  2150. function UTF8ToString(const S: array of Byte): UnicodeString;
  2151. var
  2152. rs: RawByteString;
  2153. Count: Integer;
  2154. begin
  2155. Count := Length(S);
  2156. SetLength(rs, Count);
  2157. if Count > 0 then
  2158. fpc_pchar_ansistr_intern_charmove(pansichar(@S),Low(S),rs,0,Count);
  2159. Result := UTF8ToString(rs);
  2160. end;
  2161. {$endif not CPUJVM}
  2162. Function LocaleNameToCodePage(const localename : shortstring; out codepage : TSystemCodePage) : Boolean;
  2163. begin
  2164. Result:=(localename='UTF-8') or (localename='UTF8');
  2165. if Result then
  2166. CodePage:=CP_UTF8
  2167. else
  2168. begin
  2169. Result:=(localename='UTF-7') or (localename='UTF7');
  2170. if Result then
  2171. CodePage:=CP_UTF7
  2172. else
  2173. begin
  2174. Result:=Assigned(LocaleNameToCodePageCallBack);
  2175. If Result then
  2176. LocaleNameToCodePageCallBack(LocaleName,CodePage,Result);
  2177. end;
  2178. end;
  2179. end;