ustrings.inc 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UTF-8 strings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$i wustrings.inc}
  13. {
  14. This file contains the implementation of the UnicodeString type,
  15. and all things that are needed for it.
  16. UnicodeString is defined as a 'silent' punicodechar :
  17. a punicodechar that points to :
  18. @-8 : SizeInt for reference count;
  19. @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
  20. with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
  21. Windows COM BSTR.
  22. @ : String + Terminating #0;
  23. Punicodechar(Unicodestring) is a valid typecast.
  24. So WS[i] is converted to the address @WS+i-1.
  25. Constants should be assigned a reference count of -1
  26. Meaning that they can't be disposed of.
  27. }
  28. Type
  29. PUnicodeRec = ^TUnicodeRec;
  30. TUnicodeRec = Packed Record
  31. Ref : SizeInt;
  32. Len : SizeInt;
  33. First : UnicodeChar;
  34. end;
  35. Const
  36. UnicodeRecLen = SizeOf(TUnicodeRec);
  37. UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
  38. {
  39. Default UnicodeChar <-> Char conversion is to only convert the
  40. lower 127 chars, all others are translated to spaces.
  41. These routines can be overwritten for the Current Locale
  42. }
  43. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:ansistring;len:SizeInt);
  44. var
  45. i : SizeInt;
  46. begin
  47. setlength(dest,len);
  48. for i:=1 to len do
  49. begin
  50. if word(source^)<256 then
  51. dest[i]:=char(word(source^))
  52. else
  53. dest[i]:='?';
  54. inc(source);
  55. end;
  56. end;
  57. procedure DefaultAnsi2UnicodeMove(source:pchar;var dest:unicodestring;len:SizeInt);
  58. var
  59. i : SizeInt;
  60. begin
  61. setlength(dest,len);
  62. for i:=1 to len do
  63. begin
  64. dest[i]:=unicodechar(byte(source^));
  65. inc(source);
  66. end;
  67. end;
  68. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  69. begin
  70. manager:=widestringmanager;
  71. end;
  72. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  73. begin
  74. Old:=widestringmanager;
  75. widestringmanager:=New;
  76. end;
  77. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  78. begin
  79. widestringmanager:=New;
  80. end;
  81. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  82. begin
  83. manager:=widestringmanager;
  84. end;
  85. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  86. begin
  87. Old:=widestringmanager;
  88. widestringmanager:=New;
  89. end;
  90. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  91. begin
  92. widestringmanager:=New;
  93. end;
  94. {****************************************************************************
  95. Internal functions, not in interface.
  96. ****************************************************************************}
  97. procedure UnicodeStringError;
  98. begin
  99. HandleErrorFrame(204,get_frame);
  100. end;
  101. {$ifdef UnicodeStrDebug}
  102. Procedure DumpUnicodeRec(S : Pointer);
  103. begin
  104. If S=Nil then
  105. Writeln ('String is nil')
  106. Else
  107. Begin
  108. With PUnicodeRec(S-UnicodeFirstOff)^ do
  109. begin
  110. Write ('(Len:',len);
  111. Writeln (' Ref: ',ref,')');
  112. end;
  113. end;
  114. end;
  115. {$endif}
  116. Function NewUnicodeString(Len : SizeInt) : Pointer;
  117. {
  118. Allocate a new UnicodeString on the heap.
  119. initialize it to zero length and reference count 1.
  120. }
  121. Var
  122. P : Pointer;
  123. begin
  124. GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
  125. If P<>Nil then
  126. begin
  127. PUnicodeRec(P)^.Len:=Len*2; { Initial length }
  128. PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
  129. PUnicodeRec(P)^.First:=#0; { Terminating #0 }
  130. inc(p,UnicodeFirstOff); { Points to string now }
  131. end
  132. else
  133. UnicodeStringError;
  134. NewUnicodeString:=P;
  135. end;
  136. Procedure DisposeUnicodeString(Var S : Pointer);
  137. {
  138. Deallocates a UnicodeString From the heap.
  139. }
  140. begin
  141. If S=Nil then
  142. exit;
  143. Dec (S,UnicodeFirstOff);
  144. Freemem(S);
  145. S:=Nil;
  146. end;
  147. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
  148. {
  149. Decreases the ReferenceCount of a non constant unicodestring;
  150. If the reference count is zero, deallocate the string;
  151. }
  152. Type
  153. pSizeInt = ^SizeInt;
  154. Var
  155. l : pSizeInt;
  156. Begin
  157. { Zero string }
  158. if S=Nil then
  159. exit;
  160. { check for constant strings ...}
  161. l:=@PUnicodeRec(S-UnicodeFirstOff)^.Ref;
  162. if l^<0 then
  163. exit;
  164. { declocked does a MT safe dec and returns true, if the counter is 0 }
  165. if declocked(l^) then
  166. { Ref count dropped to zero ...
  167. ... remove }
  168. DisposeUnicodeString(S);
  169. end;
  170. { alias for internal use }
  171. Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
  172. Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
  173. Begin
  174. If S=Nil then
  175. exit;
  176. { constant string ? }
  177. If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
  178. exit;
  179. inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
  180. end;
  181. { alias for internal use }
  182. Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
  183. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  184. function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
  185. {
  186. Converts a UnicodeString to a ShortString;
  187. }
  188. Var
  189. Size : SizeInt;
  190. temp : ansistring;
  191. begin
  192. result:='';
  193. Size:=Length(S2);
  194. if Size>0 then
  195. begin
  196. If Size>high_of_res then
  197. Size:=high_of_res;
  198. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  199. result:=temp;
  200. end;
  201. end;
  202. {$else FPC_STRTOSHORTSTRINGPROC}
  203. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  204. {
  205. Converts a UnicodeString to a ShortString;
  206. }
  207. Var
  208. Size : SizeInt;
  209. temp : ansistring;
  210. begin
  211. res:='';
  212. Size:=Length(S2);
  213. if Size>0 then
  214. begin
  215. If Size>high(res) then
  216. Size:=high(res);
  217. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  218. res:=temp;
  219. end;
  220. end;
  221. {$endif FPC_STRTOSHORTSTRINGPROC}
  222. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  223. {
  224. Converts a ShortString to a UnicodeString;
  225. }
  226. Var
  227. Size : SizeInt;
  228. begin
  229. result:='';
  230. Size:=Length(S2);
  231. if Size>0 then
  232. begin
  233. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),result,Size);
  234. { Terminating Zero }
  235. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  236. end;
  237. end;
  238. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString): AnsiString; compilerproc;
  239. {
  240. Converts a UnicodeString to an AnsiString
  241. }
  242. Var
  243. Size : SizeInt;
  244. begin
  245. result:='';
  246. Size:=Length(S2);
  247. if Size>0 then
  248. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,Size);
  249. end;
  250. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : AnsiString): UnicodeString; compilerproc;
  251. {
  252. Converts an AnsiString to a UnicodeString;
  253. }
  254. Var
  255. Size : SizeInt;
  256. begin
  257. result:='';
  258. Size:=Length(S2);
  259. if Size>0 then
  260. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),result,Size);
  261. end;
  262. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  263. begin
  264. SetLength(Result,Length(S2));
  265. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  266. end;
  267. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  268. begin
  269. SetLength(Result,Length(S2));
  270. Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
  271. end;
  272. Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar): ansistring; compilerproc;
  273. var
  274. Size : SizeInt;
  275. begin
  276. result:='';
  277. if p=nil then
  278. exit;
  279. Size := IndexWord(p^, -1, 0);
  280. if Size>0 then
  281. widestringmanager.Unicode2AnsiMoveProc(P,result,Size);
  282. end;
  283. Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
  284. var
  285. Size : SizeInt;
  286. begin
  287. result:='';
  288. if p=nil then
  289. exit;
  290. Size := IndexWord(p^, -1, 0);
  291. Setlength(result,Size);
  292. if Size>0 then
  293. begin
  294. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  295. { Terminating Zero }
  296. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  297. end;
  298. end;
  299. Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
  300. var
  301. Size : SizeInt;
  302. begin
  303. result:='';
  304. if p=nil then
  305. exit;
  306. Size := IndexWord(p^, -1, 0);
  307. Setlength(result,Size);
  308. if Size>0 then
  309. begin
  310. Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
  311. { Terminating Zero }
  312. PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
  313. end;
  314. end;
  315. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  316. Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
  317. var
  318. Size : SizeInt;
  319. temp: ansistring;
  320. begin
  321. result:='';
  322. if p=nil then
  323. exit;
  324. Size := IndexWord(p^, $7fffffff, 0);
  325. if Size>0 then
  326. begin
  327. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  328. result:=temp;
  329. end;
  330. end;
  331. {$else FPC_STRTOSHORTSTRINGPROC}
  332. procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
  333. var
  334. Size : SizeInt;
  335. temp: ansistring;
  336. begin
  337. res:='';
  338. if p=nil then
  339. exit;
  340. Size:=IndexWord(p^, high(PtrInt), 0);
  341. if Size>0 then
  342. begin
  343. widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
  344. res:=temp;
  345. end;
  346. end;
  347. {$endif FPC_STRTOSHORTSTRINGPROC}
  348. { checked against the ansistring routine, 2001-05-27 (FK) }
  349. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
  350. {
  351. Assigns S2 to S1 (S1:=S2), taking in account reference counts.
  352. }
  353. begin
  354. If S2<>nil then
  355. If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
  356. inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
  357. { Decrease the reference count on the old S1 }
  358. fpc_unicodestr_decr_ref (S1);
  359. s1:=s2;
  360. end;
  361. { alias for internal use }
  362. Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
  363. {$ifndef STR_CONCAT_PROCS}
  364. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  365. Var
  366. Size,Location : SizeInt;
  367. pc : punicodechar;
  368. begin
  369. { only assign if s1 or s2 is empty }
  370. if (S1='') then
  371. begin
  372. result:=s2;
  373. exit;
  374. end;
  375. if (S2='') then
  376. begin
  377. result:=s1;
  378. exit;
  379. end;
  380. Location:=Length(S1);
  381. Size:=length(S2);
  382. SetLength(result,Size+Location);
  383. pc:=punicodechar(result);
  384. Move(S1[1],pc^,Location*sizeof(UnicodeChar));
  385. inc(pc,location);
  386. Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
  387. end;
  388. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  389. Var
  390. i : Longint;
  391. p : pointer;
  392. pc : punicodechar;
  393. Size,NewSize : SizeInt;
  394. begin
  395. { First calculate size of the result so we can do
  396. a single call to SetLength() }
  397. NewSize:=0;
  398. for i:=low(sarr) to high(sarr) do
  399. inc(Newsize,length(sarr[i]));
  400. SetLength(result,NewSize);
  401. pc:=punicodechar(result);
  402. for i:=low(sarr) to high(sarr) do
  403. begin
  404. p:=pointer(sarr[i]);
  405. if assigned(p) then
  406. begin
  407. Size:=length(unicodestring(p));
  408. Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
  409. inc(pc,size);
  410. end;
  411. end;
  412. end;
  413. {$else STR_CONCAT_PROCS}
  414. procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
  415. Var
  416. Size,Location : SizeInt;
  417. same : boolean;
  418. begin
  419. { only assign if s1 or s2 is empty }
  420. if (S1='') then
  421. begin
  422. DestS:=s2;
  423. exit;
  424. end;
  425. if (S2='') then
  426. begin
  427. DestS:=s1;
  428. exit;
  429. end;
  430. Location:=Length(S1);
  431. Size:=length(S2);
  432. { Use Pointer() typecasts to prevent extra conversion code }
  433. if Pointer(DestS)=Pointer(S1) then
  434. begin
  435. same:=Pointer(S1)=Pointer(S2);
  436. SetLength(DestS,Size+Location);
  437. if same then
  438. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
  439. else
  440. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  441. end
  442. else if Pointer(DestS)=Pointer(S2) then
  443. begin
  444. SetLength(DestS,Size+Location);
  445. Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  446. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  447. end
  448. else
  449. begin
  450. DestS:='';
  451. SetLength(DestS,Size+Location);
  452. Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
  453. Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
  454. end;
  455. end;
  456. procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
  457. Var
  458. i : Longint;
  459. p,pc : pointer;
  460. Size,NewLen : SizeInt;
  461. lowstart : longint;
  462. destcopy : pointer;
  463. OldDestLen : SizeInt;
  464. begin
  465. if high(sarr)=0 then
  466. begin
  467. DestS:='';
  468. exit;
  469. end;
  470. destcopy:=nil;
  471. lowstart:=low(sarr);
  472. if Pointer(DestS)=Pointer(sarr[lowstart]) then
  473. inc(lowstart);
  474. { Check for another reuse, then we can't use
  475. the append optimization }
  476. for i:=lowstart to high(sarr) do
  477. begin
  478. if Pointer(DestS)=Pointer(sarr[i]) then
  479. begin
  480. { if DestS is used somewhere in the middle of the expression,
  481. we need to make sure the original string still exists after
  482. we empty/modify DestS.
  483. This trick only works with reference counted strings. Therefor
  484. this optimization is disabled for WINLIKEUNICODESTRING }
  485. destcopy:=pointer(dests);
  486. fpc_UnicodeStr_Incr_Ref(destcopy);
  487. lowstart:=low(sarr);
  488. break;
  489. end;
  490. end;
  491. { Start with empty DestS if we start with concatting
  492. the first array element }
  493. if lowstart=low(sarr) then
  494. DestS:='';
  495. OldDestLen:=length(DestS);
  496. { Calculate size of the result so we can do
  497. a single call to SetLength() }
  498. NewLen:=0;
  499. for i:=low(sarr) to high(sarr) do
  500. inc(NewLen,length(sarr[i]));
  501. SetLength(DestS,NewLen);
  502. { Concat all strings, except the string we already
  503. copied in DestS }
  504. pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
  505. for i:=lowstart to high(sarr) do
  506. begin
  507. p:=pointer(sarr[i]);
  508. if assigned(p) then
  509. begin
  510. Size:=length(unicodestring(p));
  511. Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
  512. inc(pc,size*sizeof(UnicodeChar));
  513. end;
  514. end;
  515. fpc_UnicodeStr_Decr_Ref(destcopy);
  516. end;
  517. {$endif STR_CONCAT_PROCS}
  518. Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
  519. var
  520. w: unicodestring;
  521. begin
  522. widestringmanager.Ansi2UnicodeMoveProc(@c, w, 1);
  523. fpc_Char_To_UChar:= w[1];
  524. end;
  525. Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
  526. {
  527. Converts a Char to a UnicodeString;
  528. }
  529. begin
  530. Setlength(fpc_Char_To_UnicodeStr,1);
  531. fpc_Char_To_UnicodeStr[1]:=c;
  532. { Terminating Zero }
  533. PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
  534. end;
  535. Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
  536. {
  537. Converts a UnicodeChar to a Char;
  538. }
  539. var
  540. s: ansistring;
  541. begin
  542. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  543. if length(s)=1 then
  544. fpc_UChar_To_Char:= s[1]
  545. else
  546. fpc_UChar_To_Char:='?';
  547. end;
  548. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  549. {
  550. Converts a WideChar to a UnicodeString;
  551. }
  552. begin
  553. Setlength (Result,1);
  554. Result[1]:= c;
  555. end;
  556. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  557. {
  558. Converts a UnicodeChar to a UnicodeString;
  559. }
  560. begin
  561. Setlength (fpc_UChar_To_UnicodeStr,1);
  562. fpc_UChar_To_UnicodeStr[1]:= c;
  563. end;
  564. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar): AnsiString; compilerproc;
  565. {
  566. Converts a UnicodeChar to a AnsiString;
  567. }
  568. begin
  569. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, 1);
  570. end;
  571. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  572. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  573. {
  574. Converts a UnicodeChar to a ShortString;
  575. }
  576. var
  577. s: ansistring;
  578. begin
  579. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  580. fpc_UChar_To_ShortStr:= s;
  581. end;
  582. {$else FPC_STRTOSHORTSTRINGPROC}
  583. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  584. {
  585. Converts a UnicodeChar to a ShortString;
  586. }
  587. var
  588. s: ansistring;
  589. begin
  590. widestringmanager.Unicode2AnsiMoveProc(@c,s,1);
  591. res:=s;
  592. end;
  593. {$endif FPC_STRTOSHORTSTRINGPROC}
  594. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  595. Var
  596. L : SizeInt;
  597. begin
  598. if (not assigned(p)) or (p[0]=#0) Then
  599. begin
  600. fpc_pchar_to_unicodestr := '';
  601. exit;
  602. end;
  603. l:=IndexChar(p^,-1,#0);
  604. widestringmanager.Ansi2UnicodeMoveProc(P,fpc_PChar_To_UnicodeStr,l);
  605. end;
  606. Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
  607. var
  608. i : SizeInt;
  609. begin
  610. if (zerobased) then
  611. begin
  612. if (arr[0]=#0) Then
  613. begin
  614. fpc_chararray_to_unicodestr := '';
  615. exit;
  616. end;
  617. i:=IndexChar(arr,high(arr)+1,#0);
  618. if i = -1 then
  619. i := high(arr)+1;
  620. end
  621. else
  622. i := high(arr)+1;
  623. SetLength(fpc_CharArray_To_UnicodeStr,i);
  624. widestringmanager.Ansi2UnicodeMoveProc (pchar(@arr),fpc_CharArray_To_UnicodeStr,i);
  625. end;
  626. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  627. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  628. var
  629. l: longint;
  630. index: longint;
  631. len: byte;
  632. temp: ansistring;
  633. begin
  634. l := high(arr)+1;
  635. if l>=256 then
  636. l:=255
  637. else if l<0 then
  638. l:=0;
  639. if zerobased then
  640. begin
  641. index:=IndexWord(arr[0],l,0);
  642. if (index < 0) then
  643. len := l
  644. else
  645. len := index;
  646. end
  647. else
  648. len := l;
  649. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  650. fpc_UnicodeCharArray_To_ShortStr := temp;
  651. end;
  652. {$else FPC_STRTOSHORTSTRINGPROC}
  653. procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  654. var
  655. l: longint;
  656. index: ptrint;
  657. len: byte;
  658. temp: ansistring;
  659. begin
  660. l := high(arr)+1;
  661. if l>=high(res)+1 then
  662. l:=high(res)
  663. else if l<0 then
  664. l:=0;
  665. if zerobased then
  666. begin
  667. index:=IndexWord(arr[0],l,0);
  668. if index<0 then
  669. len:=l
  670. else
  671. len:=index;
  672. end
  673. else
  674. len:=l;
  675. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
  676. res:=temp;
  677. end;
  678. {$endif FPC_STRTOSHORTSTRINGPROC}
  679. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; zerobased: boolean = true): AnsiString; compilerproc;
  680. var
  681. i : SizeInt;
  682. begin
  683. if (zerobased) then
  684. begin
  685. i:=IndexWord(arr,high(arr)+1,0);
  686. if i = -1 then
  687. i := high(arr)+1;
  688. end
  689. else
  690. i := high(arr)+1;
  691. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  692. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,i);
  693. end;
  694. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  695. var
  696. i : SizeInt;
  697. begin
  698. if (zerobased) then
  699. begin
  700. i:=IndexWord(arr,high(arr)+1,0);
  701. if i = -1 then
  702. i := high(arr)+1;
  703. end
  704. else
  705. i := high(arr)+1;
  706. SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
  707. Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
  708. end;
  709. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  710. var
  711. i : SizeInt;
  712. begin
  713. if (zerobased) then
  714. begin
  715. i:=IndexWord(arr,high(arr)+1,0);
  716. if i = -1 then
  717. i := high(arr)+1;
  718. end
  719. else
  720. i := high(arr)+1;
  721. SetLength(fpc_WideCharArray_To_UnicodeStr,i);
  722. Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
  723. end;
  724. { due to their names, the following procedures should be in wstrings.inc,
  725. however, the compiler generates code using this functions on all platforms }
  726. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  727. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  728. var
  729. l: longint;
  730. index: longint;
  731. len: byte;
  732. temp: ansistring;
  733. begin
  734. l := high(arr)+1;
  735. if l>=256 then
  736. l:=255
  737. else if l<0 then
  738. l:=0;
  739. if zerobased then
  740. begin
  741. index:=IndexWord(arr[0],l,0);
  742. if (index < 0) then
  743. len := l
  744. else
  745. len := index;
  746. end
  747. else
  748. len := l;
  749. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  750. fpc_WideCharArray_To_ShortStr := temp;
  751. end;
  752. {$else FPC_STRTOSHORTSTRINGPROC}
  753. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  754. var
  755. l: longint;
  756. index: ptrint;
  757. len: byte;
  758. temp: ansistring;
  759. begin
  760. l := high(arr)+1;
  761. if l>=high(res)+1 then
  762. l:=high(res)
  763. else if l<0 then
  764. l:=0;
  765. if zerobased then
  766. begin
  767. index:=IndexWord(arr[0],l,0);
  768. if index<0 then
  769. len:=l
  770. else
  771. len:=index;
  772. end
  773. else
  774. len:=l;
  775. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  776. res:=temp;
  777. end;
  778. {$endif FPC_STRTOSHORTSTRINGPROC}
  779. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; zerobased: boolean = true): AnsiString; compilerproc;
  780. var
  781. i : SizeInt;
  782. begin
  783. if (zerobased) then
  784. begin
  785. i:=IndexWord(arr,high(arr)+1,0);
  786. if i = -1 then
  787. i := high(arr)+1;
  788. end
  789. else
  790. i := high(arr)+1;
  791. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  792. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,i);
  793. end;
  794. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  795. var
  796. i : SizeInt;
  797. begin
  798. if (zerobased) then
  799. begin
  800. i:=IndexWord(arr,high(arr)+1,0);
  801. if i = -1 then
  802. i := high(arr)+1;
  803. end
  804. else
  805. i := high(arr)+1;
  806. SetLength(fpc_WideCharArray_To_WideStr,i);
  807. Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
  808. end;
  809. {$ifndef FPC_STRTOCHARARRAYPROC}
  810. { inside the compiler, the resulttype is modified to that of the actual }
  811. { chararray we're converting to (JM) }
  812. function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
  813. var
  814. len: SizeInt;
  815. temp: ansistring;
  816. begin
  817. len := length(src);
  818. { make sure we don't dereference src if it can be nil (JM) }
  819. if len > 0 then
  820. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  821. len := length(temp);
  822. if len > arraysize then
  823. len := arraysize;
  824. {$r-}
  825. move(temp[1],fpc_unicodestr_to_chararray[0],len);
  826. fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
  827. {$ifdef RangeCheckWasOn}
  828. {$r+}
  829. {$endif}
  830. end;
  831. { inside the compiler, the resulttype is modified to that of the actual }
  832. { unicodechararray we're converting to (JM) }
  833. function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
  834. var
  835. len: SizeInt;
  836. begin
  837. len := length(src);
  838. if len > arraysize then
  839. len := arraysize;
  840. {$r-}
  841. { make sure we don't try to access element 1 of the ansistring if it's nil }
  842. if len > 0 then
  843. move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
  844. fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  845. {$ifdef RangeCheckWasOn}
  846. {$r+}
  847. {$endif}
  848. end;
  849. { inside the compiler, the resulttype is modified to that of the actual }
  850. { chararray we're converting to (JM) }
  851. function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
  852. var
  853. len: SizeInt;
  854. temp: unicodestring;
  855. begin
  856. len := length(src);
  857. { make sure we don't dereference src if it can be nil (JM) }
  858. if len > 0 then
  859. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  860. len := length(temp);
  861. if len > arraysize then
  862. len := arraysize;
  863. {$r-}
  864. move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
  865. fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  866. {$ifdef RangeCheckWasOn}
  867. {$r+}
  868. {$endif}
  869. end;
  870. function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
  871. var
  872. len: longint;
  873. temp : unicodestring;
  874. begin
  875. len := length(src);
  876. { make sure we don't access char 1 if length is 0 (JM) }
  877. if len > 0 then
  878. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  879. len := length(temp);
  880. if len > arraysize then
  881. len := arraysize;
  882. {$r-}
  883. move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
  884. fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
  885. {$ifdef RangeCheckWasOn}
  886. {$r+}
  887. {$endif}
  888. end;
  889. {$else ndef FPC_STRTOCHARARRAYPROC}
  890. procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
  891. var
  892. len: SizeInt;
  893. temp: ansistring;
  894. begin
  895. len := length(src);
  896. { make sure we don't dereference src if it can be nil (JM) }
  897. if len > 0 then
  898. widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
  899. len := length(temp);
  900. if len > length(res) then
  901. len := length(res);
  902. {$r-}
  903. move(temp[1],res[0],len);
  904. fillchar(res[len],length(res)-len,0);
  905. {$ifdef RangeCheckWasOn}
  906. {$r+}
  907. {$endif}
  908. end;
  909. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  910. var
  911. len: SizeInt;
  912. begin
  913. len := length(src);
  914. if len > length(res) then
  915. len := length(res);
  916. {$r-}
  917. { make sure we don't try to access element 1 of the ansistring if it's nil }
  918. if len > 0 then
  919. move(src[1],res[0],len*SizeOf(UnicodeChar));
  920. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  921. {$ifdef RangeCheckWasOn}
  922. {$r+}
  923. {$endif}
  924. end;
  925. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  926. var
  927. len: SizeInt;
  928. temp: unicodestring;
  929. begin
  930. len := length(src);
  931. { make sure we don't dereference src if it can be nil (JM) }
  932. if len > 0 then
  933. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  934. len := length(temp);
  935. if len > length(res) then
  936. len := length(res);
  937. {$r-}
  938. move(temp[1],res[0],len*sizeof(unicodechar));
  939. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  940. {$ifdef RangeCheckWasOn}
  941. {$r+}
  942. {$endif}
  943. end;
  944. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  945. var
  946. len: longint;
  947. temp : unicodestring;
  948. begin
  949. len := length(src);
  950. { make sure we don't access char 1 if length is 0 (JM) }
  951. if len > 0 then
  952. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  953. len := length(temp);
  954. if len > length(res) then
  955. len := length(res);
  956. {$r-}
  957. move(temp[1],res[0],len*sizeof(unicodechar));
  958. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  959. {$ifdef RangeCheckWasOn}
  960. {$r+}
  961. {$endif}
  962. end;
  963. {$endif ndef FPC_STRTOCHARARRAYPROC}
  964. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
  965. {
  966. Compares 2 UnicodeStrings;
  967. The result is
  968. <0 if S1<S2
  969. 0 if S1=S2
  970. >0 if S1>S2
  971. }
  972. Var
  973. MaxI,Temp : SizeInt;
  974. begin
  975. if pointer(S1)=pointer(S2) then
  976. begin
  977. fpc_UnicodeStr_Compare:=0;
  978. exit;
  979. end;
  980. Maxi:=Length(S1);
  981. temp:=Length(S2);
  982. If MaxI>Temp then
  983. MaxI:=Temp;
  984. Temp:=CompareWord(S1[1],S2[1],MaxI);
  985. if temp=0 then
  986. temp:=Length(S1)-Length(S2);
  987. fpc_UnicodeStr_Compare:=Temp;
  988. end;
  989. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
  990. {
  991. Compares 2 UnicodeStrings for equality only;
  992. The result is
  993. 0 if S1=S2
  994. <>0 if S1<>S2
  995. }
  996. Var
  997. MaxI : SizeInt;
  998. begin
  999. if pointer(S1)=pointer(S2) then
  1000. exit(0);
  1001. Maxi:=Length(S1);
  1002. If MaxI<>Length(S2) then
  1003. exit(-1)
  1004. else
  1005. exit(CompareWord(S1[1],S2[1],MaxI));
  1006. end;
  1007. Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
  1008. begin
  1009. if p=nil then
  1010. HandleErrorFrame(201,get_frame);
  1011. end;
  1012. Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
  1013. begin
  1014. if (index>len div 2) or (Index<1) then
  1015. HandleErrorFrame(201,get_frame);
  1016. end;
  1017. Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
  1018. {
  1019. Sets The length of string S to L.
  1020. Makes sure S is unique, and contains enough room.
  1021. }
  1022. Var
  1023. Temp : Pointer;
  1024. movelen: SizeInt;
  1025. begin
  1026. if (l>0) then
  1027. begin
  1028. if Pointer(S)=nil then
  1029. begin
  1030. { Need a complete new string...}
  1031. Pointer(s):=NewUnicodeString(l);
  1032. end
  1033. { windows doesn't support reallocing unicodestrings, this code
  1034. is anyways subject to be removed because unicodestrings shouldn't be
  1035. ref. counted anymore (FK) }
  1036. else
  1037. if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
  1038. begin
  1039. Dec(Pointer(S),UnicodeFirstOff);
  1040. if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
  1041. reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
  1042. Inc(Pointer(S), UnicodeFirstOff);
  1043. end
  1044. else
  1045. begin
  1046. { Reallocation is needed... }
  1047. Temp:=Pointer(NewUnicodeString(L));
  1048. if Length(S)>0 then
  1049. begin
  1050. if l < succ(length(s)) then
  1051. movelen := l
  1052. { also move terminating null }
  1053. else
  1054. movelen := succ(length(s));
  1055. Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
  1056. end;
  1057. fpc_unicodestr_decr_ref(Pointer(S));
  1058. Pointer(S):=Temp;
  1059. end;
  1060. { Force nil termination in case it gets shorter }
  1061. PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
  1062. PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l*sizeof(UnicodeChar);
  1063. end
  1064. else
  1065. begin
  1066. { Length=0 }
  1067. if Pointer(S)<>nil then
  1068. fpc_unicodestr_decr_ref (Pointer(S));
  1069. Pointer(S):=Nil;
  1070. end;
  1071. end;
  1072. {*****************************************************************************
  1073. Public functions, In interface.
  1074. *****************************************************************************}
  1075. function UnicodeCharToString(S : PUnicodeChar) : AnsiString;
  1076. begin
  1077. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  1078. end;
  1079. function StringToUnicodeChar(const Src : AnsiString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  1080. var
  1081. temp:unicodestring;
  1082. begin
  1083. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),temp,Length(Src));
  1084. if Length(temp)<DestSize then
  1085. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  1086. else
  1087. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  1088. Dest[DestSize-1]:=#0;
  1089. result:=Dest;
  1090. end;
  1091. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : AnsiString;
  1092. begin
  1093. //SetLength(result,Len);
  1094. widestringmanager.Unicode2AnsiMoveproc(S,result,Len);
  1095. end;
  1096. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  1097. begin
  1098. Dest:=UnicodeCharLenToString(Src,Len);
  1099. end;
  1100. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  1101. begin
  1102. Dest:=UnicodeCharToString(S);
  1103. end;
  1104. Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
  1105. {
  1106. Make sure reference count of S is 1,
  1107. using copy-on-write semantics.
  1108. }
  1109. Var
  1110. SNew : Pointer;
  1111. L : SizeInt;
  1112. begin
  1113. pointer(result) := pointer(s);
  1114. If Pointer(S)=Nil then
  1115. exit;
  1116. if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
  1117. begin
  1118. L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
  1119. SNew:=NewUnicodeString (L);
  1120. Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
  1121. PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
  1122. fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
  1123. pointer(S):=SNew;
  1124. pointer(result):=SNew;
  1125. end;
  1126. end;
  1127. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  1128. var
  1129. ResultAddress : Pointer;
  1130. begin
  1131. ResultAddress:=Nil;
  1132. dec(index);
  1133. if Index < 0 then
  1134. Index := 0;
  1135. { Check Size. Accounts for Zero-length S, the double check is needed because
  1136. Size can be maxint and will get <0 when adding index }
  1137. if (Size>Length(S)) or
  1138. (Index+Size>Length(S)) then
  1139. Size:=Length(S)-Index;
  1140. If Size>0 then
  1141. begin
  1142. If Index<0 Then
  1143. Index:=0;
  1144. ResultAddress:=Pointer(NewUnicodeString (Size));
  1145. if ResultAddress<>Nil then
  1146. begin
  1147. Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
  1148. PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size*sizeof(UnicodeChar);
  1149. PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
  1150. end;
  1151. end;
  1152. fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
  1153. Pointer(fpc_unicodestr_Copy):=ResultAddress;
  1154. end;
  1155. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  1156. var
  1157. i,MaxLen : SizeInt;
  1158. pc : punicodechar;
  1159. begin
  1160. Pos:=0;
  1161. if Length(SubStr)>0 then
  1162. begin
  1163. MaxLen:=Length(source)-Length(SubStr);
  1164. i:=0;
  1165. pc:=@source[1];
  1166. while (i<=MaxLen) do
  1167. begin
  1168. inc(i);
  1169. if (SubStr[1]=pc^) and
  1170. (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
  1171. begin
  1172. Pos:=i;
  1173. exit;
  1174. end;
  1175. inc(pc);
  1176. end;
  1177. end;
  1178. end;
  1179. { Faster version for a unicodechar alone }
  1180. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  1181. var
  1182. i: SizeInt;
  1183. pc : punicodechar;
  1184. begin
  1185. pc:=@s[1];
  1186. for i:=1 to length(s) do
  1187. begin
  1188. if pc^=c then
  1189. begin
  1190. pos:=i;
  1191. exit;
  1192. end;
  1193. inc(pc);
  1194. end;
  1195. pos:=0;
  1196. end;
  1197. Function Pos (c : AnsiString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1198. begin
  1199. result:=Pos(UnicodeString(c),s);
  1200. end;
  1201. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1202. begin
  1203. result:=Pos(UnicodeString(c),s);
  1204. end;
  1205. Function Pos (c : UnicodeString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1206. begin
  1207. result:=Pos(c,UnicodeString(s));
  1208. end;
  1209. { Faster version for a char alone. Must be implemented because }
  1210. { pos(c: char; const s: shortstring) also exists, so otherwise }
  1211. { using pos(char,pchar) will always call the shortstring version }
  1212. { (exact match for first argument), also with $h+ (JM) }
  1213. Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
  1214. var
  1215. i: SizeInt;
  1216. wc : unicodechar;
  1217. pc : punicodechar;
  1218. begin
  1219. wc:=c;
  1220. pc:=@s[1];
  1221. for i:=1 to length(s) do
  1222. begin
  1223. if pc^=wc then
  1224. begin
  1225. pos:=i;
  1226. exit;
  1227. end;
  1228. inc(pc);
  1229. end;
  1230. pos:=0;
  1231. end;
  1232. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  1233. Var
  1234. LS : SizeInt;
  1235. begin
  1236. If Length(S)=0 then
  1237. exit;
  1238. if index<=0 then
  1239. exit;
  1240. LS:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len div sizeof(UnicodeChar);
  1241. if (Index<=LS) and (Size>0) then
  1242. begin
  1243. UniqueString (S);
  1244. if Size+Index>LS then
  1245. Size:=LS-Index+1;
  1246. if Index+Size<=LS then
  1247. begin
  1248. Dec(Index);
  1249. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  1250. end;
  1251. Setlength(s,LS-Size);
  1252. end;
  1253. end;
  1254. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  1255. var
  1256. Temp : UnicodeString;
  1257. LS : SizeInt;
  1258. begin
  1259. If Length(Source)=0 then
  1260. exit;
  1261. if index <= 0 then
  1262. index := 1;
  1263. Ls:=Length(S);
  1264. if index > LS then
  1265. index := LS+1;
  1266. Dec(Index);
  1267. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  1268. SetLength(Temp,Length(Source)+LS);
  1269. If Index>0 then
  1270. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  1271. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  1272. If (LS-Index)>0 then
  1273. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  1274. S:=Temp;
  1275. end;
  1276. Function UpCase(c:UnicodeChar):UnicodeChar;
  1277. var
  1278. s : UnicodeString;
  1279. begin
  1280. s:=c;
  1281. result:=widestringmanager.UpperUnicodeStringProc(s)[1];
  1282. end;
  1283. function UpCase(const s : UnicodeString) : UnicodeString;
  1284. begin
  1285. result:=widestringmanager.UpperUnicodeStringProc(s);
  1286. end;
  1287. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  1288. var
  1289. BufLen: SizeInt;
  1290. begin
  1291. SetLength(S,Len);
  1292. If (Buf<>Nil) and (Len>0) then
  1293. begin
  1294. BufLen := IndexWord(Buf^, Len+1, 0);
  1295. If (BufLen>0) and (BufLen < Len) then
  1296. Len := BufLen;
  1297. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  1298. PUnicodeChar(Pointer(S)+Len*sizeof(UnicodeChar))^:=#0;
  1299. end;
  1300. end;
  1301. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  1302. var
  1303. BufLen: SizeInt;
  1304. begin
  1305. SetLength(S,Len);
  1306. If (Buf<>Nil) and (Len>0) then
  1307. begin
  1308. BufLen := IndexByte(Buf^, Len+1, 0);
  1309. If (BufLen>0) and (BufLen < Len) then
  1310. Len := BufLen;
  1311. widestringmanager.Ansi2UnicodeMoveProc(Buf,S,Len);
  1312. //PUnicodeChar(Pointer(S)+Len*sizeof(UnicodeChar))^:=#0;
  1313. end;
  1314. end;
  1315. {$ifndef FPUNONE}
  1316. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  1317. Var
  1318. SS : String;
  1319. begin
  1320. fpc_Val_Real_UnicodeStr := 0;
  1321. if length(S) > 255 then
  1322. code := 256
  1323. else
  1324. begin
  1325. SS := S;
  1326. Val(SS,fpc_Val_Real_UnicodeStr,code);
  1327. end;
  1328. end;
  1329. {$endif}
  1330. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  1331. var ss:shortstring;
  1332. begin
  1333. if length(s)>255 then
  1334. code:=256
  1335. else
  1336. begin
  1337. ss:=s;
  1338. val(ss,fpc_val_enum_unicodestr,code);
  1339. end;
  1340. end;
  1341. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  1342. Var
  1343. SS : String;
  1344. begin
  1345. if length(S) > 255 then
  1346. begin
  1347. fpc_Val_Currency_UnicodeStr:=0;
  1348. code := 256;
  1349. end
  1350. else
  1351. begin
  1352. SS := S;
  1353. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  1354. end;
  1355. end;
  1356. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  1357. Var
  1358. SS : ShortString;
  1359. begin
  1360. fpc_Val_UInt_UnicodeStr := 0;
  1361. if length(S) > 255 then
  1362. code := 256
  1363. else
  1364. begin
  1365. SS := S;
  1366. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  1367. end;
  1368. end;
  1369. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  1370. Var
  1371. SS : ShortString;
  1372. begin
  1373. fpc_Val_SInt_UnicodeStr:=0;
  1374. if length(S)>255 then
  1375. code:=256
  1376. else
  1377. begin
  1378. SS := S;
  1379. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  1380. end;
  1381. end;
  1382. {$ifndef CPU64}
  1383. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  1384. Var
  1385. SS : ShortString;
  1386. begin
  1387. fpc_Val_qword_UnicodeStr:=0;
  1388. if length(S)>255 then
  1389. code:=256
  1390. else
  1391. begin
  1392. SS := S;
  1393. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  1394. end;
  1395. end;
  1396. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  1397. Var
  1398. SS : ShortString;
  1399. begin
  1400. fpc_Val_int64_UnicodeStr:=0;
  1401. if length(S)>255 then
  1402. code:=256
  1403. else
  1404. begin
  1405. SS := S;
  1406. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  1407. end;
  1408. end;
  1409. {$endif CPU64}
  1410. {$ifndef FPUNONE}
  1411. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  1412. var
  1413. ss : shortstring;
  1414. begin
  1415. str_real(len,fr,d,treal_type(rt),ss);
  1416. s:=ss;
  1417. end;
  1418. {$endif}
  1419. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1420. var ss:shortstring;
  1421. begin
  1422. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1423. s:=ss;
  1424. end;
  1425. {$ifdef FPC_HAS_STR_CURRENCY}
  1426. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1427. var
  1428. ss : shortstring;
  1429. begin
  1430. str(c:len:fr,ss);
  1431. s:=ss;
  1432. end;
  1433. {$endif FPC_HAS_STR_CURRENCY}
  1434. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1435. Var
  1436. SS : ShortString;
  1437. begin
  1438. Str (v:Len,SS);
  1439. S:=SS;
  1440. end;
  1441. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1442. Var
  1443. SS : ShortString;
  1444. begin
  1445. str(v:Len,SS);
  1446. S:=SS;
  1447. end;
  1448. {$ifndef CPU64}
  1449. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1450. Var
  1451. SS : ShortString;
  1452. begin
  1453. Str (v:Len,SS);
  1454. S:=SS;
  1455. end;
  1456. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1457. Var
  1458. SS : ShortString;
  1459. begin
  1460. str(v:Len,SS);
  1461. S:=SS;
  1462. end;
  1463. {$endif CPU64}
  1464. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1465. begin
  1466. if assigned(Source) then
  1467. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1468. else
  1469. Result:=0;
  1470. end;
  1471. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1472. var
  1473. i,j : SizeUInt;
  1474. w : word;
  1475. begin
  1476. result:=0;
  1477. if source=nil then
  1478. exit;
  1479. i:=0;
  1480. j:=0;
  1481. if assigned(Dest) then
  1482. begin
  1483. while (i<SourceChars) and (j<MaxDestBytes) do
  1484. begin
  1485. w:=word(Source[i]);
  1486. case w of
  1487. 0..$7f:
  1488. begin
  1489. Dest[j]:=char(w);
  1490. inc(j);
  1491. end;
  1492. $80..$7ff:
  1493. begin
  1494. if j+1>=MaxDestBytes then
  1495. break;
  1496. Dest[j]:=char($c0 or (w shr 6));
  1497. Dest[j+1]:=char($80 or (w and $3f));
  1498. inc(j,2);
  1499. end;
  1500. else
  1501. begin
  1502. if j+2>=MaxDestBytes then
  1503. break;
  1504. Dest[j]:=char($e0 or (w shr 12));
  1505. Dest[j+1]:=char($80 or ((w shr 6)and $3f));
  1506. Dest[j+2]:=char($80 or (w and $3f));
  1507. inc(j,3);
  1508. end;
  1509. end;
  1510. inc(i);
  1511. end;
  1512. if j>SizeUInt(MaxDestBytes-1) then
  1513. j:=MaxDestBytes-1;
  1514. Dest[j]:=#0;
  1515. end
  1516. else
  1517. begin
  1518. while i<SourceChars do
  1519. begin
  1520. case word(Source[i]) of
  1521. $0..$7f:
  1522. inc(j);
  1523. $80..$7ff:
  1524. inc(j,2);
  1525. else
  1526. inc(j,3);
  1527. end;
  1528. inc(i);
  1529. end;
  1530. end;
  1531. result:=j+1;
  1532. end;
  1533. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1534. begin
  1535. if assigned(Source) then
  1536. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1537. else
  1538. Result:=0;
  1539. end;
  1540. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1541. var
  1542. i,j : SizeUInt;
  1543. w: SizeUInt;
  1544. b : byte;
  1545. begin
  1546. if not assigned(Source) then
  1547. begin
  1548. result:=0;
  1549. exit;
  1550. end;
  1551. result:=SizeUInt(-1);
  1552. i:=0;
  1553. j:=0;
  1554. if assigned(Dest) then
  1555. begin
  1556. while (j<MaxDestChars) and (i<SourceBytes) do
  1557. begin
  1558. b:=byte(Source[i]);
  1559. w:=b;
  1560. inc(i);
  1561. // 2 or 3 bytes?
  1562. if b>=$80 then
  1563. begin
  1564. w:=b and $3f;
  1565. if i>=SourceBytes then
  1566. exit;
  1567. // 3 bytes?
  1568. if (b and $20)<>0 then
  1569. begin
  1570. b:=byte(Source[i]);
  1571. inc(i);
  1572. if i>=SourceBytes then
  1573. exit;
  1574. if (b and $c0)<>$80 then
  1575. exit;
  1576. w:=(w shl 6) or (b and $3f);
  1577. end;
  1578. b:=byte(Source[i]);
  1579. w:=(w shl 6) or (b and $3f);
  1580. if (b and $c0)<>$80 then
  1581. exit;
  1582. inc(i);
  1583. end;
  1584. Dest[j]:=UnicodeChar(w);
  1585. inc(j);
  1586. end;
  1587. if j>=MaxDestChars then j:=MaxDestChars-1;
  1588. Dest[j]:=#0;
  1589. end
  1590. else
  1591. begin
  1592. while i<SourceBytes do
  1593. begin
  1594. b:=byte(Source[i]);
  1595. inc(i);
  1596. // 2 or 3 bytes?
  1597. if b>=$80 then
  1598. begin
  1599. if i>=SourceBytes then
  1600. exit;
  1601. // 3 bytes?
  1602. b := b and $3f;
  1603. if (b and $20)<>0 then
  1604. begin
  1605. b:=byte(Source[i]);
  1606. inc(i);
  1607. if i>=SourceBytes then
  1608. exit;
  1609. if (b and $c0)<>$80 then
  1610. exit;
  1611. end;
  1612. if (byte(Source[i]) and $c0)<>$80 then
  1613. exit;
  1614. inc(i);
  1615. end;
  1616. inc(j);
  1617. end;
  1618. end;
  1619. result:=j+1;
  1620. end;
  1621. function UTF8Encode(const s : Ansistring) : UTF8String; inline;
  1622. begin
  1623. Result:=UTF8Encode(UnicodeString(s));
  1624. end;
  1625. function UTF8Encode(const s : UnicodeString) : UTF8String;
  1626. var
  1627. i : SizeInt;
  1628. hs : UTF8String;
  1629. begin
  1630. result:='';
  1631. if s='' then
  1632. exit;
  1633. SetLength(hs,length(s)*3);
  1634. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1635. if i>0 then
  1636. begin
  1637. SetLength(hs,i-1);
  1638. result:=hs;
  1639. end;
  1640. end;
  1641. function UTF8Decode(const s : UTF8String): UnicodeString;
  1642. var
  1643. i : SizeInt;
  1644. hs : UnicodeString;
  1645. begin
  1646. result:='';
  1647. if s='' then
  1648. exit;
  1649. SetLength(hs,length(s));
  1650. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1651. if i>0 then
  1652. begin
  1653. SetLength(hs,i-1);
  1654. result:=hs;
  1655. end;
  1656. end;
  1657. function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
  1658. begin
  1659. Result:=Utf8Encode(s);
  1660. end;
  1661. function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
  1662. begin
  1663. Result:=Utf8Decode(s);
  1664. end;
  1665. { converts an utf-16 code point or surrogate pair to utf-32 }
  1666. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1667. var
  1668. w: unicodechar;
  1669. begin
  1670. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1671. { are the same in UTF-32 }
  1672. w:=s[index];
  1673. if (w<=#$d7ff) or
  1674. (w>=#$e000) then
  1675. begin
  1676. result:=UCS4Char(w);
  1677. len:=1;
  1678. end
  1679. { valid surrogate pair? }
  1680. else if (w<=#$dbff) and
  1681. { w>=#$d7ff check not needed, checked above }
  1682. (index<length(s)) and
  1683. (s[index+1]>=#$dc00) and
  1684. (s[index+1]<=#$dfff) then
  1685. { convert the surrogate pair to UTF-32 }
  1686. begin
  1687. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1688. len:=2;
  1689. end
  1690. else
  1691. { invalid surrogate -> do nothing }
  1692. begin
  1693. result:=UCS4Char(w);
  1694. len:=1;
  1695. end;
  1696. end;
  1697. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1698. var
  1699. i, slen,
  1700. destindex : SizeInt;
  1701. len : longint;
  1702. begin
  1703. slen:=length(s);
  1704. setlength(result,slen+1);
  1705. i:=1;
  1706. destindex:=0;
  1707. while (i<=slen) do
  1708. begin
  1709. result[destindex]:=utf16toutf32(s,i,len);
  1710. inc(destindex);
  1711. inc(i,len);
  1712. end;
  1713. { destindex <= slen (surrogate pairs may have been merged) }
  1714. { destindex+1 for terminating #0 (dynamic arrays are }
  1715. { implicitely filled with zero) }
  1716. setlength(result,destindex+1);
  1717. end;
  1718. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  1719. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  1720. var
  1721. p : PUnicodeChar;
  1722. begin
  1723. { if nc > $ffff, we need two places }
  1724. if (index+ord(nc > $ffff)>length(s)) then
  1725. if (length(s) < 10*256) then
  1726. setlength(s,length(s)+10)
  1727. else
  1728. setlength(s,length(s)+length(s) shr 8);
  1729. { we know that s is unique -> avoid uniquestring calls}
  1730. p:=@s[index];
  1731. if (nc<$ffff) then
  1732. begin
  1733. p^:=unicodechar(nc);
  1734. inc(index);
  1735. end
  1736. else if (dword(nc)<=$10ffff) then
  1737. begin
  1738. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  1739. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  1740. inc(index,2);
  1741. end
  1742. else
  1743. { invalid code point }
  1744. begin
  1745. p^:='?';
  1746. inc(index);
  1747. end;
  1748. end;
  1749. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1750. var
  1751. i : SizeInt;
  1752. resindex : SizeInt;
  1753. begin
  1754. { skip terminating #0 }
  1755. SetLength(result,length(s)-1);
  1756. resindex:=1;
  1757. for i:=0 to high(s)-1 do
  1758. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  1759. { adjust result length (may be too big due to growing }
  1760. { for surrogate pairs) }
  1761. setlength(result,resindex-1);
  1762. end;
  1763. const
  1764. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  1765. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  1766. procedure unimplementedunicodestring;
  1767. begin
  1768. {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
  1769. If IsConsole then
  1770. begin
  1771. Writeln(StdErr,SNoUnicodestrings);
  1772. Writeln(StdErr,SRecompileWithUnicodestrings);
  1773. end;
  1774. {$endif FPC_HAS_FEATURE_CONSOLEIO}
  1775. HandleErrorFrame(233,get_frame);
  1776. end;
  1777. {$warnings off}
  1778. function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
  1779. begin
  1780. unimplementedunicodestring;
  1781. end;
  1782. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  1783. begin
  1784. unimplementedunicodestring;
  1785. end;
  1786. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  1787. begin
  1788. unimplementedunicodestring;
  1789. end;
  1790. {$warnings on}
  1791. procedure initunicodestringmanager;
  1792. begin
  1793. {$ifndef HAS_WIDESTRINGMANAGER}
  1794. widestringmanager.Unicode2AnsiMoveProc:=@defaultUnicode2AnsiMove;
  1795. widestringmanager.Ansi2UnicodeMoveProc:=@defaultAnsi2UnicodeMove;
  1796. widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
  1797. widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
  1798. {$endif HAS_WIDESTRINGMANAGER}
  1799. widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
  1800. widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  1801. end;