ustrings.inc 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_UNICODESTRING_TYPE_DEFINED}
  13. {$define FPC_UNICODESTRING_TYPE_DEFINED}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. and all things that are needed for it.
  17. UnicodeString is defined as a 'silent' punicodechar :
  18. a punicodechar that points to (S= SizeOf(SizeInt), R= (if CPU64 then SizeOf(Longint) else SizeOf(SizeInt))):
  19. @-S-R : Reference count (R bytes)
  20. @-S : SizeInt for size; size=number of chars. Multiply with
  21. sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Record
  31. CodePage : TSystemCodePage;
  32. ElementSize : Word;
  33. {$if not defined(VER3_0) and not defined(VER3_2)}
  34. {$ifdef CPU64}
  35. Ref : Longint;
  36. {$else}
  37. Ref : SizeInt;
  38. {$endif}
  39. {$else}
  40. {$ifdef CPU64}
  41. { align fields }
  42. Dummy : DWord;
  43. {$endif CPU64}
  44. Ref : SizeInt;
  45. {$endif}
  46. Len : SizeInt;
  47. end;
  48. Const
  49. UnicodeFirstOff = SizeOf(TUnicodeRec);
  50. {$endif FPC_UNICODESTRING_TYPE_DEFINED}
  51. {
  52. Default UnicodeChar <-> AnsiChar conversion is to only convert the
  53. lower 127 chars, all others are translated to '?'.
  54. These routines can be overridden for the Current Locale
  55. }
  56. {$ifndef FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  57. {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  58. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  59. var
  60. i : SizeInt;
  61. p : PAnsiChar;
  62. begin
  63. setlength(dest,len);
  64. if not assigned(pointer(dest)) then
  65. exit;
  66. SetCodePage(dest,cp,false);
  67. p:=pointer(dest); {SetLength guarantees that dest is unique}
  68. for i:=1 to len do
  69. begin
  70. if word(source^)<256 then
  71. p^:=AnsiChar(word(source^))
  72. else
  73. p^:='?';
  74. inc(source);
  75. inc(p);
  76. end;
  77. end;
  78. {$endif FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
  79. {$ifndef FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  80. {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  81. procedure DefaultAnsi2UnicodeMove(source:pansichar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  82. var
  83. i : SizeInt;
  84. p : PUnicodeChar;
  85. begin
  86. setlength(dest,len);
  87. p:=pointer(dest); {SetLength guarantees that dest is unique}
  88. for i:=1 to len do
  89. begin
  90. p^:=unicodechar(byte(source^));
  91. inc(source);
  92. inc(p);
  93. end;
  94. end;
  95. {$endif FPC_HAS_DEFAULT_ANSI_2_UNICODE}
  96. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  97. function DefaultCharLengthPChar(const Str: PAnsiChar): PtrInt;
  98. begin
  99. DefaultCharLengthPChar:=length(Str);
  100. end;
  101. function DefaultCodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
  102. begin
  103. if str[0]<>#0 then
  104. DefaultCodePointLength:=1
  105. else
  106. DefaultCodePointLength:=0;
  107. end;
  108. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  109. function DefaultGetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  110. begin
  111. { don't raise an exception here. We need this for text file handling }
  112. if stdcp<>scpFileSystemSingleByte then
  113. Result:=DefaultSystemCodePage
  114. else
  115. { we could return UTF-8 here in case of FPCRTL_FILESYSTEM_UTF8, but
  116. without a fully functional widestring manager that will probably cause
  117. more problems that it solves }
  118. Result:=DefaultFileSystemCodePage
  119. end;
  120. Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
  121. begin
  122. manager:=widestringmanager;
  123. end;
  124. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
  125. begin
  126. Old:=widestringmanager;
  127. widestringmanager:=New;
  128. end;
  129. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  130. begin
  131. widestringmanager:=New;
  132. end;
  133. Procedure GetWideStringManager (out Manager : TUnicodeStringManager);
  134. begin
  135. manager:=widestringmanager;
  136. end;
  137. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out old: TUnicodeStringManager);
  138. begin
  139. Old:=widestringmanager;
  140. widestringmanager:=New;
  141. end;
  142. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  143. begin
  144. widestringmanager:=New;
  145. end;
  146. {****************************************************************************
  147. Internal functions, not in interface.
  148. ****************************************************************************}
  149. procedure UnicodeStringError;
  150. begin
  151. HandleErrorAddrFrameInd(204,get_pc_addr,get_frame);
  152. end;
  153. {$ifndef FPC_HAS_NEW_UNICODESTRING}
  154. {$define FPC_HAS_NEW_UNICODESTRING}
  155. Function NewUnicodeString(Len : SizeInt) : Pointer;
  156. {
  157. Allocate a new UnicodeString on the heap.
  158. initialize it to zero length and reference count 1.
  159. }
  160. Var
  161. P : Pointer;
  162. begin
  163. GetMem(P,Len*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  164. If P<>Nil then
  165. begin
  166. PUnicodeRec(P)^.Len:=Len; { Initial length }
  167. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  168. PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
  169. PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
  170. inc(p,UnicodeFirstOff); { Points to string now }
  171. PUnicodeChar(P)^:=#0; { Terminating #0 }
  172. end
  173. else
  174. UnicodeStringError;
  175. NewUnicodeString:=P;
  176. end;
  177. {$endif FPC_HAS_NEW_UNICODESTRING}
  178. {$ifndef FPC_HAS_UNICODESTR_DECR_REF}
  179. {$define FPC_HAS_UNICODESTR_DECR_REF}
  180. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  181. {
  182. Decreases the ReferenceCount of a non constant unicodestring;
  183. If the reference count is zero, deallocate the string;
  184. }
  185. Var
  186. p: PUnicodeRec;
  187. Begin
  188. { Zero string }
  189. if S=Nil then
  190. exit;
  191. { check for constant strings ...}
  192. p:=PUnicodeRec(S-UnicodeFirstOff);
  193. S:=nil;
  194. if p^.Ref<0 then
  195. exit;
  196. { declocked does a MT safe dec and returns true, if the counter is 0 }
  197. if declocked(p^.Ref) then
  198. FreeMem(p);
  199. end;
  200. { alias for internal use }
  201. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  202. {$endif FPC_HAS_UNICODESTR_DECR_REF}
  203. {$ifndef FPC_HAS_UNICODESTR_INCR_REF}
  204. {$define FPC_HAS_UNICODESTR_INCR_REF}
  205. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  206. Begin
  207. If S=Nil then
  208. exit;
  209. { constant string ? }
  210. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  211. exit;
  212. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  213. end;
  214. { alias for internal use }
  215. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  216. {$endif FPC_HAS_UNICODESTR_INCR_REF}
  217. {$ifndef FPC_HAS_UNICODESTR_TO_SHORTSTR}
  218. {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
  219. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  220. {
  221. Converts a UnicodeString to a ShortString;
  222. }
  223. Var
  224. Size : SizeInt;
  225. temp : ansistring;
  226. begin
  227. res:='';
  228. Size:=Length(S2);
  229. if Size>0 then
  230. begin
  231. If Size>high(res) then
  232. Size:=high(res);
  233. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,DefaultSystemCodePage,Size);
  234. res:=temp;
  235. end;
  236. end;
  237. {$endif FPC_HAS_UNICODESTR_TO_SHORTSTR}
  238. {$ifndef FPC_HAS_SHORTSTR_TO_UNICODESTR}
  239. {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
  240. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  241. {
  242. Converts a ShortString to a UnicodeString;
  243. }
  244. Var
  245. Size : SizeInt;
  246. begin
  247. result:='';
  248. Size:=Length(S2);
  249. if Size>0 then
  250. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(@S2[1]),DefaultSystemCodePage,result,Size);
  251. end;
  252. {$endif FPC_HAS_SHORTSTR_TO_UNICODESTR}
  253. {$ifndef FPC_HAS_UNICODESTR_TO_ANSISTR}
  254. {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
  255. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  256. {
  257. Converts a UnicodeString to an AnsiString
  258. }
  259. Var
  260. Size : SizeInt;
  261. {$ifndef FPC_HAS_CPSTRING}
  262. cp : TSystemCodePage;
  263. {$endif FPC_HAS_CPSTRING}
  264. begin
  265. {$ifndef FPC_HAS_CPSTRING}
  266. cp:=DefaultSystemCodePage;
  267. {$endif FPC_HAS_CPSTRING}
  268. result:='';
  269. Size:=Length(S2);
  270. if Size>0 then
  271. begin
  272. cp:=TranslatePlaceholderCP(cp);
  273. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,cp,Size);
  274. end;
  275. end;
  276. {$endif FPC_HAS_UNICODESTR_TO_ANSISTR}
  277. {$ifndef FPC_HAS_ANSISTR_TO_UNICODESTR}
  278. {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
  279. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
  280. {
  281. Converts an AnsiString to a UnicodeString;
  282. }
  283. Var
  284. Size : SizeInt;
  285. cp: TSystemCodePage;
  286. begin
  287. result:='';
  288. Size:=Length(S2);
  289. if Size>0 then
  290. begin
  291. cp:=TranslatePlaceholderCP(StringCodePage(S2));
  292. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(S2),cp,result,Size);
  293. end;
  294. end;
  295. {$endif FPC_HAS_ANSISTR_TO_UNICODESTR}
  296. {$ifndef FPC_HAS_UNICODESTR_TO_WIDESTR}
  297. {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
  298. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  299. begin
  300. SetLength(Result,Length(S2));
  301. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  302. end;
  303. {$endif FPC_HAS_UNICODESTR_TO_WIDESTR}
  304. {$ifndef FPC_HAS_WIDESTR_TO_UNICODESTR}
  305. {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
  306. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  307. begin
  308. SetLength(Result,Length(S2));
  309. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  310. end;
  311. {$endif FPC_HAS_WIDESTR_TO_UNICODESTR}
  312. {$ifndef FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  313. {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  314. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  315. var
  316. Size : SizeInt;
  317. begin
  318. result:='';
  319. if p=nil then
  320. exit;
  321. Size := IndexWord(p^, -1, 0);
  322. Setlength(result,Size);
  323. if Size>0 then
  324. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  325. end;
  326. {$endif FPC_HAS_PWIDECHAR_TO_UNICODESTR}
  327. {$ifndef FPC_HAS_PWIDECHAR_TO_ANSISTR}
  328. {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
  329. Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
  330. var
  331. Size : SizeInt;
  332. {$ifndef FPC_HAS_CPSTRING}
  333. cp : TSystemCodePage;
  334. {$endif FPC_HAS_CPSTRING}
  335. begin
  336. {$ifndef FPC_HAS_CPSTRING}
  337. cp:=DefaultSystemCodePage;
  338. {$endif FPC_HAS_CPSTRING}
  339. result:='';
  340. if p=nil then
  341. exit;
  342. Size := IndexWord(p^, -1, 0);
  343. if Size>0 then
  344. begin
  345. cp:=TranslatePlaceholderCP(cp);
  346. widestringmanager.Wide2AnsiMoveProc(P,result,cp,Size);
  347. end;
  348. end;
  349. {$endif FPC_HAS_PWIDECHAR_TO_ANSISTR}
  350. {$ifndef FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  351. {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  352. procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
  353. var
  354. Size : SizeInt;
  355. temp: ansistring;
  356. begin
  357. res:='';
  358. if p=nil then
  359. exit;
  360. Size:=IndexWord(p^, high(PtrInt), 0);
  361. if Size>0 then
  362. begin
  363. widestringmanager.Wide2AnsiMoveProc(p,temp,DefaultSystemCodePage,Size);
  364. res:=temp;
  365. end;
  366. end;
  367. {$endif FPC_HAS_PWIDECHAR_TO_SHORTSTR}
  368. {$ifndef FPC_HAS_UNICODESTR_ASSIGN}
  369. {$define FPC_UNICODESTR_ASSIGN}
  370. { checked against the ansistring routine, 2001-05-27 (FK) }
  371. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  372. {
  373. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  374. }
  375. begin
  376. If S2<>nil then
  377. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  378. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  379. { Decrease the reference count on the old S1 }
  380. fpc_unicodestr_decr_ref (S1);
  381. s1:=s2;
  382. end;
  383. { alias for internal use }
  384. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  385. {$endif FPC_UNICODESTR_ASSIGN}
  386. {$ifndef FPC_HAS_UNICODESTR_CONCAT}
  387. {$define FPC_HAS_UNICODESTR_CONCAT}
  388. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  389. Var
  390. Size,Location : SizeInt;
  391. same : boolean;
  392. begin
  393. { only assign if s1 or s2 is empty }
  394. if Length(S1)=0 then
  395. begin
  396. DestS:=s2;
  397. exit;
  398. end;
  399. if Length(S2)=0 then
  400. begin
  401. DestS:=s1;
  402. exit;
  403. end;
  404. Location:=Length(S1);
  405. Size:=length(S2);
  406. { Use Pointer() typecasts to prevent extra conversion code }
  407. if Pointer(DestS)=Pointer(S1) then
  408. begin
  409. same:=Pointer(S1)=Pointer(S2);
  410. SetLength(DestS,Size+Location);
  411. if same then
  412. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  413. else
  414. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  415. end
  416. else if Pointer(DestS)=Pointer(S2) then
  417. begin
  418. SetLength(DestS,Size+Location);
  419. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  420. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  421. end
  422. else
  423. begin
  424. DestS:='';
  425. SetLength(DestS,Size+Location);
  426. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  427. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  428. end;
  429. end;
  430. {$endif FPC_HAS_UNICODESTR_CONCAT}
  431. {$ifndef FPC_HAS_UNICODESTR_CONCAT_MULTI}
  432. {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
  433. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  434. Var
  435. i : SizeInt;
  436. p,pc : pointer;
  437. Size,NewLen : SizeInt;
  438. lowstart,nonemptystart : SizeInt;
  439. destcopy : pointer;
  440. OldDestLen : SizeInt;
  441. begin
  442. lowstart:=low(sarr);
  443. { skip empty strings }
  444. while (lowstart<=high(sarr)) and (sarr[lowstart]='') do
  445. inc(lowstart);
  446. if lowstart>high(sarr) then
  447. begin
  448. DestS:=''; { All source strings empty }
  449. exit;
  450. end;
  451. { Calculate size of the result so we can do
  452. a single call to SetLength() }
  453. NewLen:=0;
  454. for i:=lowstart to high(sarr) do
  455. inc(NewLen,length(sarr[i]));
  456. { In the case of the only nonempty string, return it directly. }
  457. if NewLen=length(sarr[lowstart]) then
  458. begin
  459. DestS:=sarr[lowstart];
  460. exit;
  461. end;
  462. destcopy:=nil;
  463. nonemptystart:=lowstart;
  464. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  465. inc(lowstart);
  466. { Check for another reuse, then we can't use
  467. the append optimization }
  468. for i:=lowstart to high(sarr) do
  469. begin
  470. if Pointer(DestS)=Pointer(sarr[i]) then
  471. begin
  472. { if DestS is used somewhere in the middle of the expression,
  473. we need to make sure the original string still exists after
  474. we empty/modify DestS.
  475. This trick only works with reference counted strings. Therefor
  476. this optimization is disabled for WINLIKEUNICODESTRING }
  477. destcopy:=pointer(dests);
  478. fpc_UnicodeStr_Incr_Ref(destcopy);
  479. lowstart:=nonemptystart;
  480. break;
  481. end;
  482. end;
  483. { Start with empty DestS if we start with concatting
  484. the first array element }
  485. if lowstart=nonemptystart then
  486. DestS:='';
  487. OldDestLen:=length(DestS);
  488. SetLength(DestS,NewLen);
  489. { Concat all strings, except the string we already
  490. copied in DestS }
  491. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  492. for i:=lowstart to high(sarr) do
  493. begin
  494. p:=pointer(sarr[i]);
  495. Size:=length(unicodestring(p));
  496. Move(p^,pc^,Size*sizeof(UnicodeChar));
  497. inc(pc,size*sizeof(UnicodeChar));
  498. end;
  499. fpc_UnicodeStr_Decr_Ref(destcopy);
  500. end;
  501. {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
  502. {$ifndef FPC_HAS_CHAR_TO_UCHAR}
  503. {$define FPC_HAS_CHAR_TO_UCHAR}
  504. Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
  505. var
  506. w: unicodestring;
  507. begin
  508. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,w,1);
  509. fpc_Char_To_UChar:=w[1];
  510. end;
  511. {$endif FPC_HAS_CHAR_TO_UCHAR}
  512. {$ifndef FPC_HAS_CHAR_TO_UNICODESTR}
  513. {$define FPC_HAS_CHAR_TO_UNICODESTR}
  514. Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
  515. {
  516. Converts a AnsiChar to a UnicodeString;
  517. }
  518. begin
  519. widestringmanager.Ansi2UnicodeMoveProc(@c,DefaultSystemCodePage,result,1);
  520. end;
  521. {$endif FPC_HAS_CHAR_TO_UNICODESTR}
  522. {$ifndef FPC_HAS_UCHAR_TO_CHAR}
  523. {$define FPC_HAS_UCHAR_TO_CHAR}
  524. Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
  525. {
  526. Converts a UnicodeChar to a AnsiChar;
  527. }
  528. var
  529. s: ansistring;
  530. begin
  531. widestringmanager.Unicode2AnsiMoveProc(@c, s, DefaultSystemCodePage, 1);
  532. if length(s)=1 then
  533. fpc_UChar_To_Char:= s[1]
  534. else
  535. fpc_UChar_To_Char:='?';
  536. end;
  537. {$endif FPC_HAS_UCHAR_TO_CHAR}
  538. {$ifndef FPC_HAS_UCHAR_TO_SHORTSTR}
  539. {$define FPC_HAS_UCHAR_TO_SHORTSTR}
  540. function fpc_UChar_To_ShortStr(const c : WideChar): shortstring; compilerproc;
  541. {
  542. Converts a WideChar to a ShortString;
  543. }
  544. var
  545. s: ansistring;
  546. begin
  547. widestringmanager.Wide2AnsiMoveProc(@c,s,DefaultSystemCodePage,1);
  548. result:=s;
  549. end;
  550. {$endif FPC_HAS_UCHAR_TO_SHORTSTR}
  551. {$ifndef FPC_HAS_UCHAR_TO_UNICODESTR}
  552. {$define FPC_HAS_UCHAR_TO_UNICODESTR}
  553. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  554. {
  555. Converts a UnicodeChar to a UnicodeString;
  556. }
  557. begin
  558. Setlength (fpc_UChar_To_UnicodeStr,1);
  559. fpc_UChar_To_UnicodeStr[1]:= c;
  560. end;
  561. {$endif FPC_HAS_UCHAR_TO_UNICODESTR}
  562. {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
  563. {$define FPC_HAS_UCHAR_TO_ANSISTR}
  564. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
  565. {
  566. Converts a UnicodeChar to a AnsiString;
  567. }
  568. {$ifndef FPC_HAS_CPSTRING}
  569. var
  570. cp : TSystemCodePage;
  571. {$endif FPC_HAS_CPSTRING}
  572. begin
  573. {$ifndef FPC_HAS_CPSTRING}
  574. cp:=DefaultSystemCodePage;
  575. {$endif FPC_HAS_CPSTRING}
  576. cp:=TranslatePlaceholderCP(cp);
  577. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
  578. end;
  579. {$endif FPC_HAS_UCHAR_TO_ANSISTR}
  580. {$ifndef FPC_HAS_PCHAR_TO_UNICODESTR}
  581. {$define FPC_HAS_PCHAR_TO_UNICODESTR}
  582. Function fpc_PChar_To_UnicodeStr(const p : PAnsiChar): UnicodeString; compilerproc;
  583. Var
  584. L : SizeInt;
  585. begin
  586. if (not assigned(p)) or (p[0]=#0) Then
  587. begin
  588. fpc_pchar_to_unicodestr := '';
  589. exit;
  590. end;
  591. l:=IndexChar(p^,-1,#0);
  592. widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,l);
  593. end;
  594. {$endif FPC_HAS_PCHAR_TO_UNICODESTR}
  595. {$ifndef FPC_HAS_CHARARRAY_TO_UNICODESTR}
  596. {$define FPC_HAS_CHARARRAY_TO_UNICODESTR}
  597. Function fpc_CharArray_To_UnicodeStr(const arr: array of ansichar; zerobased: boolean = true): UnicodeString; compilerproc;
  598. var
  599. i : SizeInt;
  600. begin
  601. if zerobased then
  602. begin
  603. if arr[0]=#0 Then
  604. begin
  605. fpc_chararray_to_unicodestr:='';
  606. exit;
  607. end;
  608. i:=IndexChar(arr,high(arr)+1,#0);
  609. if i=-1 then
  610. i:=high(arr)+1;
  611. end
  612. else
  613. i:=high(arr)+1;
  614. widestringmanager.Ansi2UnicodeMoveProc(pansichar(@arr),DefaultSystemCodePage,fpc_CharArray_To_UnicodeStr,i);
  615. end;
  616. {$endif FPC_HAS_CHARARRAY_TO_UNICODESTR}
  617. {$ifndef FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  618. {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  619. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  620. var
  621. i : SizeInt;
  622. begin
  623. if (zerobased) then
  624. begin
  625. i:=IndexWord(arr,high(arr)+1,0);
  626. if i = -1 then
  627. i := high(arr)+1;
  628. end
  629. else
  630. i := high(arr)+1;
  631. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  632. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  633. end;
  634. {$endif FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
  635. {$ifndef FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  636. {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  637. { due to their names, the following procedures should be in wstrings.inc,
  638. however, the compiler generates code using this functions on all platforms }
  639. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  640. var
  641. l: longint;
  642. index: ptrint;
  643. len: byte;
  644. temp: ansistring;
  645. begin
  646. l := high(arr)+1;
  647. if l>=high(res)+1 then
  648. l:=high(res)
  649. else if l<0 then
  650. l:=0;
  651. if zerobased then
  652. begin
  653. index:=IndexWord(arr[0],l,0);
  654. if index<0 then
  655. len:=l
  656. else
  657. len:=index;
  658. end
  659. else
  660. len:=l;
  661. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,DefaultSystemCodePage,len);
  662. res:=temp;
  663. end;
  664. {$endif FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
  665. {$ifndef FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  666. {$define FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  667. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; {$ifdef FPC_HAS_CPSTRING}cp : TSystemCodePage;{$endif FPC_HAS_CPSTRING} zerobased: boolean = true): AnsiString; compilerproc;
  668. var
  669. i : SizeInt;
  670. {$ifndef FPC_HAS_CPSTRING}
  671. cp : TSystemCodePage;
  672. {$endif FPC_HAS_CPSTRING}
  673. begin
  674. {$ifndef FPC_HAS_CPSTRING}
  675. cp:=DefaultSystemCodePage;
  676. {$endif FPC_HAS_CPSTRING}
  677. if (zerobased) then
  678. begin
  679. i:=IndexWord(arr,high(arr)+1,0);
  680. if i = -1 then
  681. i := high(arr)+1;
  682. end
  683. else
  684. i := high(arr)+1;
  685. if i > 0 then
  686. begin
  687. cp:=TranslatePlaceholderCP(cp);
  688. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),RawByteString(fpc_WideCharArray_To_AnsiStr),cp,i);
  689. end
  690. else
  691. fpc_WideCharArray_To_AnsiStr:='';
  692. end;
  693. {$endif FPC_HAS_WIDECHARARRAY_TO_ANSISTR}
  694. {$ifndef FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  695. {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  696. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  697. var
  698. i : SizeInt;
  699. begin
  700. if (zerobased) then
  701. begin
  702. i:=IndexWord(arr,high(arr)+1,0);
  703. if i = -1 then
  704. i := high(arr)+1;
  705. end
  706. else
  707. i := high(arr)+1;
  708. SetLength(fpc_WideCharArray_To_WideStr,i);
  709. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  710. end;
  711. {$endif FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
  712. {$ifndef FPC_HAS_UNICODESTR_TO_CHARARRAY}
  713. {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
  714. procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
  715. var
  716. len: SizeInt;
  717. temp: ansistring;
  718. begin
  719. len := length(src);
  720. { make sure we don't dereference src if it can be nil (JM) }
  721. if len > 0 then
  722. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,DefaultSystemCodePage,len);
  723. len := length(temp);
  724. if len > length(res) then
  725. len := length(res);
  726. {$push}
  727. {$r-}
  728. move(temp[1],res[0],len);
  729. fillchar(res[len],length(res)-len,0);
  730. {$pop}
  731. end;
  732. {$endif FPC_HAS_UNICODESTR_TO_UNICODECHARARRAY}
  733. {$ifndef FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  734. {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  735. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
  736. var
  737. len: SizeInt;
  738. temp: widestring;
  739. begin
  740. len := length(src);
  741. { make sure we don't dereference src if it can be nil (JM) }
  742. if len > 0 then
  743. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),TranslatePlaceholderCP(StringCodePage(src)),temp,len);
  744. len := length(temp);
  745. if len > length(res) then
  746. len := length(res);
  747. {$push}
  748. {$r-}
  749. move(temp[1],res[0],len*sizeof(widechar));
  750. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  751. {$pop}
  752. end;
  753. {$endif FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
  754. {$ifndef FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  755. {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  756. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  757. var
  758. len: longint;
  759. temp : widestring;
  760. begin
  761. len := length(src);
  762. { make sure we don't access AnsiChar 1 if length is 0 (JM) }
  763. if len > 0 then
  764. widestringmanager.ansi2widemoveproc(pansichar(@src[1]),DefaultSystemCodePage,temp,len);
  765. len := length(temp);
  766. if len > length(res) then
  767. len := length(res);
  768. {$push}
  769. {$r-}
  770. move(temp[1],res[0],len*sizeof(widechar));
  771. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  772. {$pop}
  773. end;
  774. {$endif FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
  775. {$ifndef FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  776. {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  777. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  778. var
  779. len: SizeInt;
  780. begin
  781. len := length(src);
  782. if len > length(res) then
  783. len := length(res);
  784. {$push}
  785. {$r-}
  786. { make sure we don't try to access element 1 of the widestring if it's nil }
  787. if len > 0 then
  788. move(src[1],res[0],len*SizeOf(WideChar));
  789. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  790. {$pop}
  791. end;
  792. {$endif FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
  793. {$ifndef FPC_HAS_UNICODESTR_COMPARE}
  794. {$define FPC_HAS_UNICODESTR_COMPARE}
  795. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  796. {
  797. Compares 2 UnicodeStrings;
  798. The result is
  799. <0 if S1<S2
  800. 0 if S1=S2
  801. >0 if S1>S2
  802. }
  803. Var
  804. MaxI,Temp : SizeInt;
  805. begin
  806. if pointer(S1)=pointer(S2) then
  807. begin
  808. fpc_UnicodeStr_Compare:=0;
  809. exit;
  810. end;
  811. Maxi:=Length(S1);
  812. temp:=Length(S2);
  813. If MaxI>Temp then
  814. MaxI:=Temp;
  815. Temp:=CompareWord(S1[1],S2[1],MaxI);
  816. if temp=0 then
  817. temp:=Length(S1)-Length(S2);
  818. fpc_UnicodeStr_Compare:=Temp;
  819. end;
  820. {$endif FPC_HAS_UNICODESTR_COMPARE}
  821. {$ifndef FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  822. {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  823. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  824. {
  825. Compares 2 UnicodeStrings for equality only;
  826. The result is
  827. 0 if S1=S2
  828. <>0 if S1<>S2
  829. }
  830. Var
  831. MaxI : SizeInt;
  832. begin
  833. if pointer(S1)=pointer(S2) then
  834. exit(0);
  835. Maxi:=Length(S1);
  836. If MaxI<>Length(S2) then
  837. exit(-1)
  838. else
  839. exit(CompareWord(S1[1],S2[1],MaxI));
  840. end;
  841. {$endif FPC_HAS_UNICODESTR_COMPARE_EQUAL}
  842. {$ifndef FPC_HAS_UNICODESTR_RANGECHECK}
  843. {$define FPC_HAS_UNICODESTR_RANGECHECK}
  844. Procedure fpc_UnicodeStr_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  845. begin
  846. if (p=nil) or (index>PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<1) then
  847. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  848. end;
  849. Procedure fpc_UnicodeStr_ZeroBased_RangeCheck(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_UNICODESTR_ZEROBASED_RANGECHECK']; compilerproc;
  850. begin
  851. if (p=nil) or (index>=PUnicodeRec(p-UnicodeFirstOff)^.len) or (Index<0) then
  852. HandleErrorAddrFrameInd(201,get_pc_addr,get_frame);
  853. end;
  854. {$endif FPC_HAS_UNICODESTR_RANGECHECK}
  855. {$ifndef FPC_HAS_UNICODESTR_SETLENGTH}
  856. {$define FPC_HAS_UNICODESTR_SETLENGTH}
  857. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  858. {
  859. Sets The length of string S to L.
  860. Makes sure S is unique, and contains enough room.
  861. }
  862. Var
  863. Temp : Pointer;
  864. movelen: SizeInt;
  865. nl,lens, lena : SizeUInt;
  866. begin
  867. nl:=l;
  868. if (l>0) then
  869. begin
  870. if Pointer(S)=nil then
  871. begin
  872. { Need a complete new string...}
  873. Pointer(s):=NewUnicodeString(nl);
  874. end
  875. else
  876. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  877. begin
  878. Temp:=Pointer(s)-UnicodeFirstOff;
  879. lens:=MemSize(Temp);
  880. lena:=SizeUInt(L*sizeof(UnicodeChar)+(UnicodeFirstOff+sizeof(UnicodeChar)));
  881. if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
  882. begin
  883. reallocmem(Temp, lena);
  884. Pointer(S):=Temp+UnicodeFirstOff;
  885. end;
  886. end
  887. else
  888. begin
  889. { Reallocation is needed... }
  890. Temp:=NewUnicodeString(nL);
  891. if Length(S)>0 then
  892. begin
  893. if l < succ(length(s)) then
  894. movelen := l
  895. { also move terminating null }
  896. else
  897. movelen := succ(length(s));
  898. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  899. end;
  900. fpc_unicodestr_decr_ref(Pointer(S));
  901. Pointer(S):=Temp;
  902. end;
  903. { Force nil termination in case it gets shorter }
  904. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  905. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=nl;
  906. end
  907. else { length=0, deallocate the string }
  908. fpc_unicodestr_decr_ref (Pointer(S));
  909. end;
  910. {$endif FPC_HAS_UNICODESTR_SETLENGTH}
  911. {*****************************************************************************
  912. Public functions, In interface.
  913. *****************************************************************************}
  914. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  915. begin
  916. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  917. end;
  918. {$ifndef FPC_HAS_STRING_TO_UNICODECHAR}
  919. {$define FPC_HAS_STRING_TO_UNICODECHAR}
  920. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  921. begin
  922. result:=StringToWideChar(Src,Dest,DestSize);
  923. end;
  924. {$endif FPC_HAS_STRING_TO_UNICODECHAR}
  925. function WideCharToString(S : PWideChar) : UnicodeString;
  926. begin
  927. result:=WideCharLenToString(s,Length(WideString(s)));
  928. end;
  929. {$ifndef FPC_HAS_STRING_LEN_TO_WIDECHAR}
  930. {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
  931. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  932. var
  933. temp: widestring;
  934. Len: SizeInt;
  935. begin
  936. widestringmanager.Ansi2WideMoveProc(PAnsiChar(Src),StringCodePage(Src),temp,Length(Src));
  937. Len:=Length(temp);
  938. if DestSize<=Len then
  939. Len:=Destsize-1;
  940. move(temp[1],Dest^,Len*SizeOf(WideChar));
  941. Dest[Len]:=#0;
  942. result:=Dest;
  943. end;
  944. {$endif FPC_HAS_STRING_LEN_TO_WIDECHAR}
  945. {$ifndef FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  946. {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  947. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  948. begin
  949. SetLength(result,Len);
  950. Move(S^,Pointer(Result)^,Len*2);
  951. end;
  952. {$endif FPC_HAS_UNICODECHAR_LEN_TO_STRING}
  953. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  954. begin
  955. Dest:=UnicodeCharLenToString(Src,Len);
  956. end;
  957. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  958. begin
  959. Dest:=AnsiString(UnicodeCharLenToString(Src,Len));
  960. end;
  961. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  962. begin
  963. Dest:=AnsiString(UnicodeCharToString(S));
  964. end;
  965. {$ifndef FPC_HAS_WIDECHAR_LEN_TO_STRING}
  966. {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
  967. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  968. begin
  969. SetLength(result,Len);
  970. Move(S^,Pointer(Result)^,Len*2);
  971. end;
  972. {$endif FPC_HAS_WIDECHAR_LEN_TO_STRING}
  973. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  974. begin
  975. Dest:=WideCharLenToString(Src,Len);
  976. end;
  977. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  978. begin
  979. Dest:=AnsiString(WideCharLenToString(Src,Len));
  980. end;
  981. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  982. begin
  983. Dest:=WideCharToString(S);
  984. end;
  985. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  986. begin
  987. Dest:=AnsiString(WideCharToString(S));
  988. end;
  989. Function fpc_unicodestr_Unique_func(Var S : UnicodeString): Pointer; external name 'FPC_UNICODESTR_UNIQUE';
  990. Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
  991. begin
  992. fpc_unicodestr_Unique_func(S);
  993. end;
  994. {$ifndef FPC_HAS_UNICODESTR_UNIQUE}
  995. {$define FPC_HAS_UNICODESTR_UNIQUE}
  996. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  997. {
  998. Make sure reference count of S is 1,
  999. using copy-on-write semantics.
  1000. }
  1001. Var
  1002. SNew : Pointer;
  1003. L : SizeInt;
  1004. begin
  1005. pointer(result) := pointer(s);
  1006. If Pointer(S)=Nil then
  1007. exit;
  1008. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1009. begin
  1010. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
  1011. SNew:=NewUnicodeString (L);
  1012. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1013. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
  1014. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1015. pointer(S):=SNew;
  1016. pointer(result):=SNew;
  1017. end;
  1018. end;
  1019. {$endif FPC_HAS_UNICODESTR_UNIQUE}
  1020. {$ifndef FPC_HAS_UNICODESTR_COPY}
  1021. {$define FPC_HAS_UNICODESTR_COPY}
  1022. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1023. var
  1024. ResultAddress : Pointer;
  1025. begin
  1026. ResultAddress:=Nil;
  1027. dec(index);
  1028. if Index < 0 then
  1029. Index := 0;
  1030. { Check Size. Accounts for Zero-length S, the double check is needed because
  1031. Size can be maxint and will get <0 when adding index }
  1032. if (Size>Length(S)) or
  1033. (Index+Size>Length(S)) then
  1034. Size:=Length(S)-Index;
  1035. If Size>0 then
  1036. begin
  1037. ResultAddress:=NewUnicodeString(Size);
  1038. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1039. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size;
  1040. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1041. end;
  1042. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1043. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1044. end;
  1045. {$endif FPC_HAS_UNICODESTR_COPY}
  1046. {$ifndef FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1047. {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1048. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1049. var
  1050. i,MaxLen,nsource,nsub,d : SizeInt;
  1051. begin
  1052. Pos:=0;
  1053. nsource:=Length(Source);
  1054. nsub:=Length(Substr);
  1055. if (nsub>0) and (Offset>0) and (Offset<=nsource) then
  1056. begin
  1057. MaxLen:=nsource-nsub+1;
  1058. i:=Offset;
  1059. while (i<=MaxLen) do
  1060. begin
  1061. d:=IndexWord(Source[i],MaxLen-i+1,word(Substr[1]));
  1062. if d<0 then
  1063. exit;
  1064. if CompareWord(Substr[1],Source[i+d],nsub)=0 then
  1065. exit(i+d);
  1066. i:=i+d+1;
  1067. end;
  1068. end;
  1069. end;
  1070. {$endif FPC_HAS_POS_UNICODESTR_UNICODESTR}
  1071. {$ifndef FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1072. {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1073. { Faster version for a unicodechar alone }
  1074. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1075. var
  1076. ns,idx: SizeInt;
  1077. begin
  1078. pos:=0;
  1079. ns:=length(s);
  1080. if (Offset>0) and (Offset<=ns) then
  1081. begin
  1082. idx:=IndexWord(s[Offset],ns-Offset+1,word(c));
  1083. if idx>=0 then
  1084. pos:=Offset+idx;
  1085. end;
  1086. end;
  1087. {$endif FPC_HAS_POS_UNICODECHAR_UNICODESTR}
  1088. { DO NOT inline these! Inlining a managed typecast creates an implicit try..finally
  1089. block, which is significant bloat without any sensible speed improvement. }
  1090. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1091. begin
  1092. result:=Pos(UnicodeString(c),s,offset);
  1093. end;
  1094. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1095. begin
  1096. result:=Pos(UnicodeString(c),s,OffSet);
  1097. end;
  1098. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  1099. begin
  1100. result:=Pos(c,UnicodeString(s),OffSet);
  1101. end;
  1102. {$ifndef FPC_HAS_UNICODESTR_OF_CHAR}
  1103. {$define FPC_HAS_UNICODESTR_OF_CHAR}
  1104. Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
  1105. begin
  1106. SetLength(StringOfChar,l);
  1107. FillWord(Pointer(StringOfChar)^,Length(StringOfChar),word(c));
  1108. end;
  1109. {$endif}
  1110. {$ifndef FPC_HAS_POS_CHAR_UNICODESTR}
  1111. {$define FPC_HAS_POS_CHAR_UNICODESTR}
  1112. { Faster version for a AnsiChar alone. Must be implemented because }
  1113. { pos(c: AnsiChar; const s: shortstring) also exists, so otherwise }
  1114. { using pos(AnsiChar,pansichar) will always call the shortstring version }
  1115. { (exact match for first argument), also with $h+ (JM) }
  1116. Function Pos (c : AnsiChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  1117. var
  1118. ns,idx: SizeInt;
  1119. begin
  1120. pos:=0;
  1121. ns:=length(s);
  1122. if (Offset>0) and (Offset<=ns) then
  1123. begin
  1124. idx:=IndexWord(s[Offset],ns-Offset+1,word(unicodechar(c)));
  1125. if idx>=0 then
  1126. pos:=Offset+idx;
  1127. end;
  1128. end;
  1129. {$endif FPC_HAS_POS_CHAR_UNICODESTR}
  1130. {$ifndef FPC_HAS_DELETE_UNICODESTR}
  1131. {$define FPC_HAS_DELETE_UNICODESTR}
  1132. Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif}(Var S : UnicodeString; Index,Size: SizeInt);
  1133. Var
  1134. LS : SizeInt;
  1135. begin
  1136. LS:=Length(S);
  1137. if (Index>LS) or (Index<=0) or (Size<=0) then
  1138. exit;
  1139. UniqueString (S);
  1140. { (Size+Index) will overflow if Size=MaxInt. }
  1141. if Size>LS-Index then
  1142. Size:=LS-Index+1;
  1143. if Size<=LS-Index then
  1144. begin
  1145. Dec(Index);
  1146. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1147. end;
  1148. Setlength(s,LS-Size);
  1149. end;
  1150. {$endif FPC_HAS_DELETE_UNICODESTR}
  1151. {$ifndef FPC_HAS_INSERT_UNICODESTR}
  1152. {$define FPC_HAS_INSERT_UNICODESTR}
  1153. Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif}(Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1154. var
  1155. Temp : UnicodeString;
  1156. LS : SizeInt;
  1157. begin
  1158. If Length(Source)=0 then
  1159. exit;
  1160. if index <= 0 then
  1161. index := 1;
  1162. Ls:=Length(S);
  1163. if index > LS then
  1164. index := LS+1;
  1165. Dec(Index);
  1166. SetLength(Temp,Length(Source)+LS);
  1167. If Index>0 then
  1168. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1169. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1170. If (LS-Index)>0 then
  1171. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1172. S:=Temp;
  1173. end;
  1174. {$endif FPC_HAS_INSERT_UNICODESTR}
  1175. {$ifndef FPC_HAS_UPCASE_UNICODECHAR}
  1176. {$define FPC_HAS_UPCASE_UNICODECHAR}
  1177. Function UpCase(c:UnicodeChar):UnicodeChar;
  1178. begin
  1179. Result:= widestringmanager.UpperUnicodeStringProc(UnicodeString(c))[1]
  1180. end;
  1181. {$endif FPC_HAS_UPCASE_UNICODECHAR}
  1182. {$ifndef FPC_HAS_UPCASE_UNICODESTR}
  1183. {$define FPC_HAS_UPCASE_UNICODESTR}
  1184. function UpCase(const s : UnicodeString) : UnicodeString;
  1185. begin
  1186. result:=widestringmanager.UpperUnicodeStringProc(s);
  1187. end;
  1188. {$endif FPC_HAS_UPCASE_UNICODESTR}
  1189. {$ifndef FPC_HAS_LOWERCASE_UNICODECHAR}
  1190. {$define FPC_HAS_LOWERCASE_UNICODECHAR}
  1191. Function LowerCase(c:UnicodeChar):UnicodeChar;
  1192. begin
  1193. Result:= widestringmanager.LowerUnicodeStringProc(UnicodeString(c))[1]
  1194. end;
  1195. {$endif FPC_HAS_LOWERCASE_UNICODECHAR}
  1196. {$ifndef FPC_HAS_LOWERCASE_UNICODESTR}
  1197. {$define FPC_HAS_LOWERCASE_UNICODESTR}
  1198. function LowerCase(const s : UnicodeString) : UnicodeString;
  1199. begin
  1200. result:=widestringmanager.LowerUnicodeStringProc(s);
  1201. end;
  1202. {$endif FPC_HAS_LOWERCASE_UNICODESTR}
  1203. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1204. {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1205. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1206. begin
  1207. SetLength(S,Len);
  1208. If (Buf<>Nil) and (Len>0) then
  1209. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1210. end;
  1211. {$endif FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
  1212. {$ifndef FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1213. {$define FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1214. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PAnsiChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  1215. begin
  1216. If (Buf<>Nil) and (Len>0) then
  1217. widestringmanager.Ansi2UnicodeMoveProc(Buf,DefaultSystemCodePage,S,Len)
  1218. else
  1219. SetLength(S,Len);
  1220. end;
  1221. {$endif FPC_HAS_SETSTRING_UNICODESTR_PCHAR}
  1222. {$ifndef FPUNONE}
  1223. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1224. Var
  1225. SS: ShortString;
  1226. begin
  1227. fpc_Val_Real_UnicodeStr:=0;
  1228. if length(S)>255 then
  1229. code:=256
  1230. else
  1231. begin
  1232. SS:=ShortString(S);
  1233. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1234. end;
  1235. end;
  1236. {$endif}
  1237. {$ifndef FPC_STR_ENUM_INTERN}
  1238. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1239. var
  1240. ss: ShortString;
  1241. begin
  1242. if length(s)>255 then
  1243. code:=256
  1244. else
  1245. begin
  1246. ss:=ShortString(s);
  1247. val(ss,fpc_val_enum_unicodestr,code);
  1248. end;
  1249. end;
  1250. {$endif FPC_STR_ENUM_INTERN}
  1251. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1252. Var
  1253. SS: ShortString;
  1254. begin
  1255. if length(S)>255 then
  1256. begin
  1257. fpc_Val_Currency_UnicodeStr:=0;
  1258. code:=256;
  1259. end
  1260. else
  1261. begin
  1262. SS:=ShortString(S);
  1263. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1264. end;
  1265. end;
  1266. Function fpc_Val_UInt_UnicodeStr ({$ifndef VER3_2}DestSize: SizeInt;{$endif VER3_2} Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1267. Var
  1268. SS: ShortString;
  1269. begin
  1270. fpc_Val_UInt_UnicodeStr:=0;
  1271. if length(S)>255 then
  1272. code:=256
  1273. else
  1274. begin
  1275. SS:=ShortString(S);
  1276. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1277. end;
  1278. end;
  1279. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1280. Var
  1281. SS: ShortString;
  1282. begin
  1283. fpc_Val_SInt_UnicodeStr:=0;
  1284. if length(S)>255 then
  1285. code:=256
  1286. else
  1287. begin
  1288. SS:=ShortString(S);
  1289. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1290. end;
  1291. end;
  1292. {$ifndef CPU64}
  1293. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1294. Var
  1295. SS: ShortString;
  1296. begin
  1297. fpc_Val_qword_UnicodeStr:=0;
  1298. if length(S)>255 then
  1299. code:=256
  1300. else
  1301. begin
  1302. SS:=ShortString(S);
  1303. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1304. end;
  1305. end;
  1306. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1307. Var
  1308. SS: ShortString;
  1309. begin
  1310. fpc_Val_int64_UnicodeStr:=0;
  1311. if length(S)>255 then
  1312. code:=256
  1313. else
  1314. begin
  1315. SS:=ShortString(S);
  1316. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1317. end;
  1318. end;
  1319. {$endif CPU64}
  1320. {$if defined(CPU16) or defined(CPU8)}
  1321. Function fpc_Val_longword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): longword; [public, alias:'FPC_VAL_LONGWORD_UNICODESTR']; compilerproc;
  1322. Var
  1323. SS: ShortString;
  1324. begin
  1325. fpc_Val_longword_UnicodeStr:=0;
  1326. if length(S)>255 then
  1327. code:=256
  1328. else
  1329. begin
  1330. SS:=ShortString(S);
  1331. Val(SS,fpc_Val_longword_UnicodeStr,Code);
  1332. end;
  1333. end;
  1334. Function fpc_Val_longint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): LongInt; [public, alias:'FPC_VAL_LONGINT_UNICODESTR']; compilerproc;
  1335. Var
  1336. SS: ShortString;
  1337. begin
  1338. fpc_Val_longint_UnicodeStr:=0;
  1339. if length(S)>255 then
  1340. code:=256
  1341. else
  1342. begin
  1343. SS:=ShortString(S);
  1344. Val(SS,fpc_Val_longint_UnicodeStr,Code);
  1345. end;
  1346. end;
  1347. Function fpc_Val_word_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): word; [public, alias:'FPC_VAL_WORD_UNICODESTR']; compilerproc;
  1348. Var
  1349. SS: ShortString;
  1350. begin
  1351. fpc_Val_word_UnicodeStr:=0;
  1352. if length(S)>255 then
  1353. code:=256
  1354. else
  1355. begin
  1356. SS:=ShortString(S);
  1357. Val(SS,fpc_Val_word_UnicodeStr,Code);
  1358. end;
  1359. end;
  1360. Function fpc_Val_smallint_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): SmallInt; [public, alias:'FPC_VAL_SMALLINT_UNICODESTR']; compilerproc;
  1361. Var
  1362. SS: ShortString;
  1363. begin
  1364. fpc_Val_smallint_UnicodeStr:=0;
  1365. if length(S)>255 then
  1366. code:=256
  1367. else
  1368. begin
  1369. SS:=ShortString(S);
  1370. Val(SS,fpc_Val_smallint_UnicodeStr,Code);
  1371. end;
  1372. end;
  1373. {$endif CPU16 or CPU8}
  1374. {$ifndef FPUNONE}
  1375. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1376. var
  1377. ss: shortstring;
  1378. begin
  1379. str_real(len,fr,d,treal_type(rt),ss);
  1380. s:=UnicodeString(ss);
  1381. end;
  1382. {$endif}
  1383. {$ifndef FPC_STR_ENUM_INTERN}
  1384. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1385. var
  1386. ss: ShortString;
  1387. begin
  1388. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1389. s:=UnicodeString(ss);
  1390. end;
  1391. {$endif FPC_STR_ENUM_INTERN}
  1392. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1393. var
  1394. ss: ShortString;
  1395. begin
  1396. fpc_shortstr_bool(b,len,ss);
  1397. s:=UnicodeString(ss);
  1398. end;
  1399. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1400. var
  1401. ss: shortstring;
  1402. begin
  1403. str(c:len:fr,ss);
  1404. s:=UnicodeString(ss);
  1405. end;
  1406. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1407. Var
  1408. SS: ShortString;
  1409. begin
  1410. Str (v:Len,SS);
  1411. S:=UnicodeString(SS);
  1412. end;
  1413. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1414. Var
  1415. SS: ShortString;
  1416. begin
  1417. str(v:Len,SS);
  1418. S:=UnicodeString(SS);
  1419. end;
  1420. {$ifndef CPU64}
  1421. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1422. Var
  1423. SS: ShortString;
  1424. begin
  1425. Str (v:Len,SS);
  1426. S:=UnicodeString(SS);
  1427. end;
  1428. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1429. Var
  1430. SS: ShortString;
  1431. begin
  1432. str(v:Len,SS);
  1433. S:=UnicodeString(SS);
  1434. end;
  1435. {$endif CPU64}
  1436. {$if defined(CPU16) or defined(CPU8)}
  1437. Procedure fpc_UnicodeStr_LongInt(v : LongInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1438. Var
  1439. SS: ShortString;
  1440. begin
  1441. Str (v:Len,SS);
  1442. S:=UnicodeString(SS);
  1443. end;
  1444. Procedure fpc_UnicodeStr_LongWord(v : LongWord;Len : SizeInt; out S : UnicodeString);compilerproc;
  1445. Var
  1446. SS: ShortString;
  1447. begin
  1448. str(v:Len,SS);
  1449. S:=UnicodeString(SS);
  1450. end;
  1451. Procedure fpc_UnicodeStr_SmallInt(v : SmallInt; Len : SizeInt; out S : UnicodeString);compilerproc;
  1452. Var
  1453. SS: ShortString;
  1454. begin
  1455. Str (v:Len,SS);
  1456. S:=UnicodeString(SS);
  1457. end;
  1458. Procedure fpc_UnicodeStr_Word(v : Word;Len : SizeInt; out S : UnicodeString);compilerproc;
  1459. Var
  1460. SS: ShortString;
  1461. begin
  1462. str(v:Len,SS);
  1463. S:=UnicodeString(SS);
  1464. end;
  1465. {$endif CPU16 or CPU8}
  1466. function UnicodeToUtf8(Dest: PAnsiChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1467. begin
  1468. if assigned(Source) then
  1469. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,Length(Source))
  1470. else
  1471. Result:=0;
  1472. end;
  1473. function UnicodeToUtf8(Dest: PAnsiChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1474. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1475. begin
  1476. runerror(217);
  1477. end;
  1478. {$else EXCLUDE_COMPLEX_PROCS}
  1479. var
  1480. i,j : SizeUInt;
  1481. lw : longword;
  1482. begin
  1483. result:=0;
  1484. if source=nil then
  1485. exit;
  1486. i:=0;
  1487. j:=0;
  1488. if assigned(Dest) then
  1489. begin
  1490. while (i<SourceChars) and (j<MaxDestBytes) do
  1491. begin
  1492. lw:=ord(Source[i]);
  1493. case lw of
  1494. 0..$7f:
  1495. begin
  1496. Dest[j]:=AnsiChar(lw);
  1497. inc(j);
  1498. end;
  1499. $80..$7ff:
  1500. begin
  1501. if j+1>=MaxDestBytes then
  1502. break;
  1503. Dest[j]:=AnsiChar($c0 or (lw shr 6));
  1504. Dest[j+1]:=AnsiChar($80 or (lw and $3f));
  1505. inc(j,2);
  1506. end;
  1507. $800..$d7ff,$e000..$ffff:
  1508. begin
  1509. if j+2>=MaxDestBytes then
  1510. break;
  1511. Dest[j]:=AnsiChar($e0 or (lw shr 12));
  1512. Dest[j+1]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1513. Dest[j+2]:=AnsiChar($80 or (lw and $3f));
  1514. inc(j,3);
  1515. end;
  1516. $d800..$dbff:
  1517. {High Surrogates}
  1518. begin
  1519. if j+3>=MaxDestBytes then
  1520. break;
  1521. if (i+1<sourcechars) and
  1522. (word(Source[i+1]) >= $dc00) and
  1523. (word(Source[i+1]) <= $dfff) then
  1524. begin
  1525. { $d7c0 is ($d800 - ($10000 shr 10)) }
  1526. lw:=(longword(lw-$d7c0) shl 10) + (ord(source[i+1]) xor $dc00);
  1527. Dest[j]:=AnsiChar($f0 or (lw shr 18));
  1528. Dest[j+1]:=AnsiChar($80 or ((lw shr 12) and $3f));
  1529. Dest[j+2]:=AnsiChar($80 or ((lw shr 6) and $3f));
  1530. Dest[j+3]:=AnsiChar($80 or (lw and $3f));
  1531. inc(j,4);
  1532. inc(i);
  1533. end;
  1534. end;
  1535. end;
  1536. inc(i);
  1537. end;
  1538. if j>SizeUInt(MaxDestBytes-1) then
  1539. j:=MaxDestBytes-1;
  1540. Dest[j]:=#0;
  1541. end
  1542. else
  1543. begin
  1544. while i<SourceChars do
  1545. begin
  1546. case word(Source[i]) of
  1547. $0..$7f:
  1548. inc(j);
  1549. $80..$7ff:
  1550. inc(j,2);
  1551. $800..$d7ff,$e000..$ffff:
  1552. inc(j,3);
  1553. $d800..$dbff:
  1554. begin
  1555. if (i+1<sourcechars) and
  1556. (word(Source[i+1]) >= $dc00) and
  1557. (word(Source[i+1]) <= $dfff) then
  1558. begin
  1559. inc(j,4);
  1560. inc(i);
  1561. end;
  1562. end;
  1563. end;
  1564. inc(i);
  1565. end;
  1566. end;
  1567. result:=j+1;
  1568. end;
  1569. {$endif EXCLUDE_COMPLEX_PROCS}
  1570. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PAnsiChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1571. begin
  1572. if assigned(Source) then
  1573. Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
  1574. else
  1575. Result:=0;
  1576. end;
  1577. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1578. begin
  1579. Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
  1580. end;
  1581. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
  1582. {$ifdef EXCLUDE_COMPLEX_PROCS}
  1583. begin
  1584. runerror(217);
  1585. end;
  1586. {$else EXCLUDE_COMPLEX_PROCS}
  1587. const
  1588. UNICODE_INVALID=63;
  1589. var
  1590. InputUTF8: SizeUInt;
  1591. IBYTE: BYTE;
  1592. OutputUnicode: SizeUInt;
  1593. PRECHAR: SizeUInt;
  1594. TempBYTE: BYTE;
  1595. CharLen: SizeUint;
  1596. LookAhead: SizeUInt;
  1597. UC: SizeUInt;
  1598. begin
  1599. if not assigned(Source) then
  1600. begin
  1601. result:=0;
  1602. exit;
  1603. end;
  1604. result:=SizeUInt(-1);
  1605. InputUTF8:=0;
  1606. OutputUnicode:=0;
  1607. PreChar:=0;
  1608. if Assigned(Dest) Then
  1609. begin
  1610. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1611. begin
  1612. IBYTE:=byte(Source[InputUTF8]);
  1613. if (IBYTE and $80) = 0 then
  1614. begin
  1615. // One character US-ASCII, convert it to unicode
  1616. // Commented code to convert LF to CRLF has been removed
  1617. Dest[OutputUnicode]:=WideChar(IBYTE);
  1618. inc(OutputUnicode);
  1619. PreChar:=IBYTE;
  1620. inc(InputUTF8);
  1621. end
  1622. else
  1623. begin
  1624. TempByte:=IBYTE;
  1625. CharLen:=0;
  1626. while (TempBYTE and $80)<>0 do
  1627. begin
  1628. TempBYTE:=(TempBYTE shl 1) and $FE;
  1629. inc(CharLen);
  1630. end;
  1631. //Test for the "CharLen" conforms UTF-8 string
  1632. //This means the 10xxxxxx pattern.
  1633. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1634. begin
  1635. //Insuficient chars in string to decode
  1636. //UTF-8 array. Fallback to single AnsiChar.
  1637. CharLen:= 1;
  1638. end;
  1639. for LookAhead := 1 to CharLen-1 do
  1640. begin
  1641. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1642. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1643. begin
  1644. //Invalid UTF-8 sequence, fallback.
  1645. CharLen:= LookAhead;
  1646. break;
  1647. end;
  1648. end;
  1649. UC:=$FFFF;
  1650. case CharLen of
  1651. 1: begin
  1652. //Not valid UTF-8 sequence
  1653. UC:=UNICODE_INVALID;
  1654. end;
  1655. 2: begin
  1656. //Two bytes UTF, convert it
  1657. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1658. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1659. if UC <= $7F then
  1660. begin
  1661. //Invalid UTF sequence.
  1662. UC:=UNICODE_INVALID;
  1663. end;
  1664. end;
  1665. 3: begin
  1666. //Three bytes, convert it to unicode
  1667. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1668. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1669. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1670. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1671. begin
  1672. //Invalid UTF-8 sequence
  1673. UC:= UNICODE_INVALID;
  1674. End;
  1675. end;
  1676. 4: begin
  1677. //Four bytes, convert it to two unicode characters
  1678. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1679. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1680. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1681. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1682. if (UC < $10000) or (UC > $10FFFF) then
  1683. begin
  1684. UC:= UNICODE_INVALID;
  1685. end
  1686. else
  1687. begin
  1688. { only store pair if room }
  1689. dec(UC,$10000);
  1690. if (OutputUnicode<MaxDestChars-1) then
  1691. begin
  1692. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1693. inc(OutputUnicode);
  1694. UC:=(UC and $3ff) + $DC00;
  1695. end
  1696. else
  1697. begin
  1698. InputUTF8:= InputUTF8 + CharLen;
  1699. { don't store anything }
  1700. CharLen:=0;
  1701. end;
  1702. end;
  1703. end;
  1704. 5,6,7: begin
  1705. //Invalid UTF8 to unicode conversion,
  1706. //mask it as invalid UNICODE too.
  1707. UC:=UNICODE_INVALID;
  1708. end;
  1709. end;
  1710. if CharLen > 0 then
  1711. begin
  1712. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1713. HandleError(231); // Will be converted to EConversionError in sysutils
  1714. PreChar:=UC;
  1715. Dest[OutputUnicode]:=WideChar(UC);
  1716. inc(OutputUnicode);
  1717. end;
  1718. InputUTF8:= InputUTF8 + CharLen;
  1719. end;
  1720. end;
  1721. Result:=OutputUnicode+1;
  1722. end
  1723. else
  1724. begin
  1725. while (InputUTF8<SourceBytes) do
  1726. begin
  1727. IBYTE:=byte(Source[InputUTF8]);
  1728. if (IBYTE and $80) = 0 then
  1729. begin
  1730. // One character US-ASCII, convert it to unicode
  1731. // Commented code to convert LF to CRLF has been removed
  1732. inc(OutputUnicode);
  1733. PreChar:=IBYTE;
  1734. inc(InputUTF8);
  1735. end
  1736. else
  1737. begin
  1738. TempByte:=IBYTE;
  1739. CharLen:=0;
  1740. while (TempBYTE and $80)<>0 do
  1741. begin
  1742. TempBYTE:=(TempBYTE shl 1) and $FE;
  1743. inc(CharLen);
  1744. end;
  1745. //Test for the "CharLen" conforms UTF-8 string
  1746. //This means the 10xxxxxx pattern.
  1747. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1748. begin
  1749. //Insuficient chars in string to decode
  1750. //UTF-8 array. Fallback to single AnsiChar.
  1751. CharLen:= 1;
  1752. end;
  1753. for LookAhead := 1 to CharLen-1 do
  1754. begin
  1755. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1756. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1757. begin
  1758. //Invalid UTF-8 sequence, fallback.
  1759. CharLen:= LookAhead;
  1760. break;
  1761. end;
  1762. end;
  1763. UC:=$FFFF;
  1764. case CharLen of
  1765. 1: begin
  1766. //Not valid UTF-8 sequence
  1767. UC:=UNICODE_INVALID;
  1768. end;
  1769. 2: begin
  1770. //Two bytes UTF, convert it
  1771. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1772. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1773. if UC <= $7F then
  1774. begin
  1775. //Invalid UTF sequence.
  1776. UC:=UNICODE_INVALID;
  1777. end;
  1778. end;
  1779. 3: begin
  1780. //Three bytes, convert it to unicode
  1781. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1782. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1783. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1784. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1785. begin
  1786. //Invalid UTF-8 sequence
  1787. UC:= UNICODE_INVALID;
  1788. end;
  1789. end;
  1790. 4: begin
  1791. //Four bytes, convert it to two unicode characters
  1792. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1793. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1794. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1795. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1796. if (UC < $10000) or (UC > $10FFFF) then
  1797. UC:= UNICODE_INVALID
  1798. else
  1799. { extra character character }
  1800. inc(OutputUnicode);
  1801. end;
  1802. 5,6,7: begin
  1803. //Invalid UTF8 to unicode conversion,
  1804. //mask it as invalid UNICODE too.
  1805. UC:=UNICODE_INVALID;
  1806. end;
  1807. end;
  1808. if CharLen > 0 then
  1809. begin
  1810. if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
  1811. HandleError(231); // Will be converted to EConversionError in sysutils
  1812. PreChar:=UC;
  1813. inc(OutputUnicode);
  1814. end;
  1815. InputUTF8:= InputUTF8 + CharLen;
  1816. end;
  1817. end;
  1818. Result:=OutputUnicode+1;
  1819. end;
  1820. end;
  1821. {$endif EXCLUDE_COMPLEX_PROCS}
  1822. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  1823. begin
  1824. Result:=UTF8Encode(UnicodeString(s));
  1825. end;
  1826. {$ifndef FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1827. {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1828. function UTF8Encode(const s : UnicodeString) : RawByteString;
  1829. var
  1830. i : SizeInt;
  1831. hs : UTF8String;
  1832. begin
  1833. result:='';
  1834. if Length(s)=0 then
  1835. exit;
  1836. SetLength(hs,length(s)*3);
  1837. i:=UnicodeToUtf8(pansichar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1838. if i>0 then
  1839. begin
  1840. SetLength(hs,i-1);
  1841. result:=hs;
  1842. end;
  1843. end;
  1844. {$endif FPC_HAS_UTF8ENCODE_UNICODESTRING}
  1845. {$ifndef FPC_HAS_UTF8DECODE_UNICODESTRING}
  1846. {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
  1847. function UTF8Decode(const s : RawByteString): UnicodeString;
  1848. var
  1849. i : SizeInt;
  1850. hs : UnicodeString;
  1851. begin
  1852. result:='';
  1853. if Length(s)=0 then
  1854. exit;
  1855. SetLength(hs,length(s));
  1856. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pansichar(s),length(s));
  1857. if i>0 then
  1858. begin
  1859. SetLength(hs,i-1);
  1860. result:=hs;
  1861. end;
  1862. end;
  1863. {$endif FPC_HAS_UTF8DECODE_UNICODESTRING}
  1864. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1865. begin
  1866. Result:=Utf8Encode(s);
  1867. end;
  1868. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  1869. begin
  1870. Result:=RawByteString(Utf8Decode(s));
  1871. end;
  1872. {$ifdef FPC_HAS_FEATURE_DYNARRAYS}
  1873. procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String);
  1874. var
  1875. i, reslen: sizeint;
  1876. w: longint;
  1877. begin
  1878. reslen:=0;
  1879. i:=0;
  1880. { calculate required length }
  1881. while (i<len) do
  1882. begin
  1883. if (p[i]<=#$d7ff) or (p[i]>=#$e000) then
  1884. inc(i)
  1885. else if (p[i]<=#$dbff) and
  1886. (i+1<len) and
  1887. (p[i+1]>=#$dc00) and
  1888. (p[i+1]<=#$dfff) then
  1889. inc(i,2)
  1890. else
  1891. inc(i);
  1892. inc(reslen);
  1893. end;
  1894. SetLength(res,reslen+1); { +1 for null termination }
  1895. reslen:=0;
  1896. i:=0;
  1897. { do conversion }
  1898. while (i<len) do
  1899. begin
  1900. w:=ord(p[i]);
  1901. if (w<=$d7ff) or (w>=$e000) then
  1902. res[reslen]:=w
  1903. else if (w<=$dbff) and
  1904. (i+1<len) and
  1905. (p[i+1]>=#$dc00) and
  1906. (p[i+1]<=#$dfff) then
  1907. begin
  1908. res[reslen]:=(UCS4Char(w-$d7c0) shl 10)+(UCS4Char(p[i+1]) xor $dc00);
  1909. inc(i);
  1910. end
  1911. else { invalid surrogate pair }
  1912. res[reslen]:=w;
  1913. inc(i);
  1914. inc(reslen);
  1915. end;
  1916. res[reslen]:=0;
  1917. end;
  1918. {$ifndef FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1919. {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1920. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1921. begin
  1922. UCS4Encode(PWideChar(s),Length(s),result);
  1923. end;
  1924. {$endif FPC_HAS_UCS4STRING_TO_UNICODESTR}
  1925. {$ifndef FPC_HAS_WIDESTR_TO_UCS4STRING}
  1926. {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
  1927. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1928. begin
  1929. UCS4Encode(PWideChar(s),Length(s),result);
  1930. end;
  1931. {$endif FPC_HAS_WIDESTR_TO_UCS4STRING}
  1932. {$ifndef FPC_HAS_UCS4STRING_TO_WIDESTR}
  1933. {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
  1934. { dest should point to previously allocated wide/unicodestring }
  1935. procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
  1936. var
  1937. i: sizeint;
  1938. nc: UCS4Char;
  1939. begin
  1940. for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
  1941. begin
  1942. nc:=s[i];
  1943. if (nc<=$ffff) then
  1944. dest^:=widechar(nc)
  1945. else if (dword(nc)<=$10ffff) then
  1946. begin
  1947. dest^:=widechar(nc shr 10 + $d7c0);
  1948. { subtracting $10000 doesn't change low 10 bits }
  1949. dest[1]:=widechar(nc and $3ff + $dc00);
  1950. inc(dest);
  1951. end
  1952. else { invalid code point }
  1953. dest^:='?';
  1954. inc(dest);
  1955. end;
  1956. end;
  1957. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1958. var
  1959. i : SizeInt;
  1960. reslen : SizeInt;
  1961. begin
  1962. reslen:=0;
  1963. for i:=0 to length(s)-2 do { skip terminating #0 }
  1964. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1965. SetLength(result,reslen);
  1966. UCS4Decode(s,pointer(result));
  1967. end;
  1968. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1969. var
  1970. i : SizeInt;
  1971. reslen : SizeInt;
  1972. begin
  1973. reslen:=0;
  1974. for i:=0 to length(s)-2 do { skip terminating #0 }
  1975. Inc(reslen,1+ord((s[i]>$ffff) and (cardinal(s[i])<=$10ffff)));
  1976. SetLength(result,reslen);
  1977. UCS4Decode(s,pointer(result));
  1978. end;
  1979. {$endif FPC_HAS_UCS4STRING_TO_WIDESTR}
  1980. {$endif FPC_HAS_FEATURE_DYNARRAYS}
  1981. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  1982. const
  1983. SNoUnicodestrings = 'This binary has no string conversion support compiled in.';
  1984. SRecompileWithUnicodestrings = 'Recompile the application with a unit that installs a unicodestring manager in the program uses clause.';
  1985. procedure unimplementedunicodestring;
  1986. begin
  1987. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1988. {$ifndef HAS_WIDESTRINGMANAGER}
  1989. If IsConsole then
  1990. begin
  1991. Writeln(StdErr,SNoUnicodestrings);
  1992. Writeln(StdErr,SRecompileWithUnicodestrings);
  1993. end;
  1994. {$endif HAS_WIDESTRINGMANAGER}
  1995. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1996. HandleErrorAddrFrameInd(234{RuntimeErrorExitCodes[reCodesetConversion]},get_pc_addr,get_frame);
  1997. end;
  1998. function StringElementSize(const S: UnicodeString): Word; overload;
  1999. begin
  2000. if assigned(Pointer(S)) then
  2001. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
  2002. else
  2003. Result:=SizeOf(UnicodeChar);
  2004. end;
  2005. function StringRefCount(const S: UnicodeString): SizeInt; overload;
  2006. begin
  2007. if assigned(Pointer(S)) then
  2008. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
  2009. else
  2010. Result:=0;
  2011. end;
  2012. function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
  2013. begin
  2014. {$ifdef FPC_HAS_CPSTRING}
  2015. if assigned(Pointer(S)) then
  2016. Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
  2017. else
  2018. {$endif FPC_HAS_CPSTRING}
  2019. Result:=DefaultUnicodeCodePage;
  2020. end;
  2021. {$push}
  2022. {$warnings off}
  2023. function StubUnicodeCase(const s : UnicodeString) : UnicodeString;
  2024. begin
  2025. unimplementedunicodestring;
  2026. end;
  2027. function StubCompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  2028. begin
  2029. unimplementedunicodestring;
  2030. end;
  2031. function StubWideCase(const s: WideString): WideString;
  2032. begin
  2033. unimplementedunicodestring;
  2034. end;
  2035. function StubCompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  2036. begin
  2037. unimplementedunicodestring;
  2038. end;
  2039. {$pop}
  2040. procedure initunicodestringmanager;
  2041. begin
  2042. {$ifndef HAS_WIDESTRINGMANAGER}
  2043. widestringmanager:=Default(TUnicodeStringManager);
  2044. {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2045. widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
  2046. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2047. widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
  2048. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  2049. widestringmanager.Wide2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2050. widestringmanager.UpperWideStringProc:=@StubWideCase;
  2051. widestringmanager.LowerWideStringProc:=@StubWideCase;
  2052. widestringmanager.Unicode2AnsiMoveProc:=@DefaultUnicode2AnsiMove;
  2053. widestringmanager.Ansi2UnicodeMoveProc:=@DefaultAnsi2UnicodeMove;
  2054. widestringmanager.UpperUnicodeStringProc:=@StubUnicodeCase;
  2055. widestringmanager.LowerUnicodeStringProc:=@StubUnicodeCase;
  2056. widestringmanager.GetStandardCodePageProc:=@DefaultGetStandardCodePage;
  2057. {$endif HAS_WIDESTRINGMANAGER}
  2058. widestringmanager.CompareWideStringProc:=@StubCompareWideString;
  2059. // widestringmanager.CompareTextWideStringProc:=@StubCompareWideString;
  2060. widestringmanager.CompareUnicodeStringProc:=@StubCompareUnicodeString;
  2061. widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
  2062. widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
  2063. end;
  2064. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  2065. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2066. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2067. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  2068. Begin
  2069. widestringmanager.Unicode2AnsiMoveProc(punicodechar(Str),Result,
  2070. DefaultFileSystemCodePage,Length(Str));
  2071. End;
  2072. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
  2073. {$ifndef FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2074. {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2075. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  2076. Begin
  2077. widestringmanager.Unicode2AnsiMoveProc(@arr[0],Result,
  2078. DefaultFileSystemCodePage,length(pwidechar(@arr[0])));
  2079. End;
  2080. {$endif FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
  2081. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
  2082. Begin
  2083. Result:=Str;
  2084. SetCodePage(Result,DefaultFileSystemCodePage,True);
  2085. End;
  2086. { Delphi compatibility: always interpret the data in the string as UTF-8,
  2087. ignore any codepage }
  2088. function UTF8ToString(const S: RawByteString): UnicodeString; inline;
  2089. begin
  2090. Result := UTF8Decode(S);
  2091. end;
  2092. function UTF8ToString(const S: ShortString): UnicodeString;
  2093. Var
  2094. rs: RawByteString;
  2095. begin
  2096. rs:=S;
  2097. Result := UTF8Decode(rs);
  2098. end;
  2099. function UTF8ToString(const S: PAnsiChar): UnicodeString;
  2100. var
  2101. rs: RawByteString;
  2102. Count: Integer;
  2103. begin
  2104. Count := length(S);
  2105. SetLength(rs, Count);
  2106. if Count > 0 then
  2107. fpc_pchar_ansistr_intern_charmove(S,0,rs,0,Count);
  2108. Result := UTF8ToString(rs);
  2109. end;
  2110. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  2111. are as well }
  2112. {$ifndef CPUJVM}
  2113. function UTF8ToString(const S: array of AnsiChar): UnicodeString;
  2114. var
  2115. rs: RawByteString;
  2116. Count: Integer;
  2117. begin
  2118. Count := Length(S);
  2119. SetLength(rs, Count);
  2120. if Count > 0 then
  2121. fpc_pchar_ansistr_intern_charmove(@S,Low(S),rs,0,Count);
  2122. Result := UTF8ToString(rs);
  2123. end;
  2124. function UTF8ToString(const S: array of Byte): UnicodeString;
  2125. var
  2126. rs: RawByteString;
  2127. Count: Integer;
  2128. begin
  2129. Count := Length(S);
  2130. SetLength(rs, Count);
  2131. if Count > 0 then
  2132. fpc_pchar_ansistr_intern_charmove(pansichar(@S),Low(S),rs,0,Count);
  2133. Result := UTF8ToString(rs);
  2134. end;
  2135. {$endif not CPUJVM}