ustrings.inc 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. Copyright (c) 2011 by Jonas Maebe,
  5. members of the Free Pascal development team.
  6. This file implements support routines for UTF-8 strings with FPC
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$i wustrings.inc}
  14. {
  15. This file contains the implementation of the UnicodeString type,
  16. which on the Java platforms is an alias for java.lang.String
  17. }
  18. Function NewUnicodeString(Len : SizeInt) : JLString;
  19. {
  20. Allocate a new UnicodeString on the heap.
  21. initialize it to zero length and reference count 1.
  22. }
  23. var
  24. data: array of jchar;
  25. begin
  26. setlength(data,len);
  27. result:=JLString.create(data);
  28. end;
  29. (*
  30. procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
  31. {
  32. Converts a UnicodeString to a ShortString;
  33. }
  34. Var
  35. Size : SizeInt;
  36. temp : ansistring;
  37. begin
  38. res:='';
  39. Size:=Length(S2);
  40. if Size>0 then
  41. begin
  42. If Size>high(res) then
  43. Size:=high(res);
  44. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
  45. res:=temp;
  46. end;
  47. end;
  48. Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
  49. {
  50. Converts a ShortString to a UnicodeString;
  51. }
  52. Var
  53. Size : SizeInt;
  54. begin
  55. result:='';
  56. Size:=Length(S2);
  57. if Size>0 then
  58. begin
  59. widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),result,Size);
  60. { Terminating Zero }
  61. PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
  62. end;
  63. end;
  64. *)
  65. Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString): AnsiString; compilerproc;
  66. {
  67. Converts a UnicodeString to an AnsiString
  68. }
  69. {$ifdef nounsupported}
  70. Var
  71. Size : SizeInt;
  72. {$endif}
  73. begin
  74. {$ifdef nounsupported}
  75. result:='';
  76. Size:=Length(S2);
  77. if Size>0 then
  78. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,Size);
  79. {$endif}
  80. end;
  81. Function fpc_AnsiStr_To_UnicodeStr (Const S2 : AnsiString): UnicodeString; compilerproc;
  82. {
  83. Converts an AnsiString to a UnicodeString;
  84. }
  85. {$ifdef nounsupported}
  86. Var
  87. Size : SizeInt;
  88. {$endif}
  89. begin
  90. {$ifdef nounsupported}
  91. result:='';
  92. Size:=Length(S2);
  93. if Size>0 then
  94. widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),result,Size);
  95. {$endif}
  96. end;
  97. Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
  98. begin
  99. result:=s2;
  100. end;
  101. Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
  102. begin
  103. result:=s2;
  104. end;
  105. function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
  106. Var
  107. sb: JLStringBuilder;
  108. begin
  109. { only assign if s1 or s2 is empty }
  110. if (length(S1)=0) then
  111. begin
  112. result:=s2;
  113. exit;
  114. end;
  115. if (length(S2)=0) then
  116. begin
  117. result:=s1;
  118. exit;
  119. end;
  120. sb:=JLStringBuilder.create(S1);
  121. sb.append(s2);
  122. result:=sb.toString;
  123. end;
  124. function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
  125. Var
  126. i : Longint;
  127. Size,NewSize : SizeInt;
  128. sb: JLStringBuilder;
  129. begin
  130. { First calculate size of the result so we can allocate a StringBuilder of
  131. the right size }
  132. NewSize:=0;
  133. for i:=low(sarr) to high(sarr) do
  134. inc(Newsize,length(sarr[i]));
  135. sb:=JLStringBuilder.create(NewSize);
  136. for i:=low(sarr) to high(sarr) do
  137. begin
  138. if length(sarr[i])>0 then
  139. sb.append(sarr[i]);
  140. end;
  141. result:=sb.toString;
  142. end;
  143. Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
  144. var
  145. str: JLString;
  146. arr: array of jbyte;
  147. begin
  148. setlength(arr,1);
  149. arr[0]:=ord(c);
  150. result:=JLString.create(arr,0,1).charAt(0);
  151. end;
  152. Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
  153. {
  154. Converts a AnsiChar to a UnicodeString;
  155. }
  156. var
  157. str: JLString;
  158. arr: array of jbyte;
  159. begin
  160. setlength(arr,1);
  161. arr[0]:=ord(c);
  162. result:=JLString.create(arr,0,1);
  163. end;
  164. Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
  165. {
  166. Converts a UnicodeChar to a AnsiChar;
  167. }
  168. var
  169. arrb: array of jbyte;
  170. arrw: array of jchar;
  171. str: JLString;
  172. begin
  173. setlength(arrw,1);
  174. arrw[0]:=c;
  175. str:=JLString.create(arrw);
  176. arrb:=str.getbytes();
  177. result:=chr(arrb[0]);
  178. end;
  179. Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
  180. {
  181. Converts a WideChar to a UnicodeString;
  182. }
  183. var
  184. arrw: array of jchar;
  185. begin
  186. setlength(arrw,1);
  187. arrw[0]:=c;
  188. result:=JLString.create(arrw);
  189. end;
  190. Function fpc_Char_To_WChar(const c : AnsiChar): WideChar; compilerproc;
  191. {
  192. Converts a AnsiChar to a WideChar;
  193. }
  194. var
  195. str: JLString;
  196. arr: array of jbyte;
  197. begin
  198. setlength(arr,1);
  199. arr[0]:=ord(c);
  200. result:=JLString.create(arr,0,1).charAt(0);
  201. end;
  202. Function fpc_WChar_To_Char(const c : WideChar): AnsiChar; compilerproc;
  203. {
  204. Converts a WideChar to a AnsiChar;
  205. }
  206. var
  207. arrb: array of jbyte;
  208. arrw: array of jchar;
  209. begin
  210. setlength(arrw,1);
  211. arrw[0]:=c;
  212. arrb:=JLString.create(arrw).getbytes();
  213. result:=chr(arrb[0]);
  214. end;
  215. (*
  216. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  217. Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
  218. {
  219. Converts a WideChar to a ShortString;
  220. }
  221. var
  222. s: ansistring;
  223. begin
  224. widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
  225. fpc_WChar_To_ShortStr:= s;
  226. end;
  227. {$else FPC_STRTOSHORTSTRINGPROC}
  228. procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
  229. {
  230. Converts a WideChar to a ShortString;
  231. }
  232. var
  233. s: ansistring;
  234. begin
  235. widestringmanager.Wide2AnsiMoveProc(@c,s,1);
  236. res:=s;
  237. end;
  238. {$endif FPC_STRTOSHORTSTRINGPROC}
  239. *)
  240. Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
  241. {
  242. Converts a UnicodeChar to a UnicodeString;
  243. }
  244. var
  245. arr: array of UnicodeChar;
  246. begin
  247. setlength(arr,1);
  248. arr[0]:=c;
  249. result:=JLString.create(arr);
  250. end;
  251. (*
  252. Function fpc_UChar_To_AnsiStr(const c : UnicodeChar): AnsiString; compilerproc;
  253. {
  254. Converts a UnicodeChar to a AnsiString;
  255. }
  256. begin
  257. widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, 1);
  258. end;
  259. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  260. Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
  261. {
  262. Converts a UnicodeChar to a ShortString;
  263. }
  264. var
  265. s: ansistring;
  266. begin
  267. widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
  268. fpc_UChar_To_ShortStr:= s;
  269. end;
  270. {$else FPC_STRTOSHORTSTRINGPROC}
  271. procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
  272. {
  273. Converts a UnicodeChar to a ShortString;
  274. }
  275. var
  276. s: ansistring;
  277. begin
  278. widestringmanager.Unicode2AnsiMoveProc(@c,s,1);
  279. res:=s;
  280. end;
  281. {$endif FPC_STRTOSHORTSTRINGPROC}
  282. Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
  283. Var
  284. L : SizeInt;
  285. begin
  286. if (not assigned(p)) or (p[0]=#0) Then
  287. begin
  288. fpc_pchar_to_unicodestr := '';
  289. exit;
  290. end;
  291. l:=IndexChar(p^,-1,#0);
  292. widestringmanager.Ansi2UnicodeMoveProc(P,fpc_PChar_To_UnicodeStr,l);
  293. end;
  294. *)
  295. Function fpc_CharArray_To_UnicodeStr(const arr: array of ansichar; zerobased: boolean = true): UnicodeString; compilerproc;
  296. var
  297. i,j : SizeInt;
  298. localarr: array of jbyte;
  299. foundnull: boolean;
  300. begin
  301. if (zerobased) then
  302. begin
  303. if (arr[0]=#0) Then
  304. begin
  305. fpc_chararray_to_unicodestr := '';
  306. exit;
  307. end;
  308. foundnull:=false;
  309. for i:=low(arr) to high(arr) do
  310. if arr[i]=#0 then
  311. begin
  312. foundnull:=true;
  313. break;
  314. end;
  315. if not foundnull then
  316. i := high(arr)+1;
  317. end
  318. else
  319. i := high(arr)+1;
  320. setlength(localarr,i);
  321. for j:=0 to i-1 do
  322. localarr[j]:=ord(arr[j]);
  323. result:=JLString.create(localarr,0,i);
  324. end;
  325. (*
  326. function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
  327. var
  328. l: longint;
  329. index: longint;
  330. len: byte;
  331. temp: ansistring;
  332. foundnull: boolean;
  333. begin
  334. l := high(arr)+1;
  335. if l>=256 then
  336. l:=255
  337. else if l<0 then
  338. l:=0;
  339. if zerobased then
  340. begin
  341. foundnull:=false;
  342. for index:=low(arr) to l-1 do
  343. if arr[index]=#0 then
  344. begin
  345. foundnull:=true;
  346. break;
  347. end;
  348. if not foundnull then
  349. len := l
  350. else
  351. len := index;
  352. end
  353. else
  354. len := l;
  355. result:=JLString.create(arr,0,l);
  356. end;
  357. Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; zerobased: boolean = true): AnsiString; compilerproc;
  358. var
  359. i : SizeInt;
  360. begin
  361. if (zerobased) then
  362. begin
  363. i:=IndexWord(arr,high(arr)+1,0);
  364. if i = -1 then
  365. i := high(arr)+1;
  366. end
  367. else
  368. i := high(arr)+1;
  369. SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
  370. widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,i);
  371. end;
  372. *)
  373. Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
  374. var
  375. i : SizeInt;
  376. foundnull : boolean;
  377. begin
  378. if (zerobased) then
  379. begin
  380. foundnull:=false;
  381. for i:=low(arr) to high(arr) do
  382. if arr[i]=#0 then
  383. begin
  384. foundnull:=true;
  385. break;
  386. end;
  387. if not foundnull then
  388. i := high(arr)+1;
  389. end
  390. else
  391. i := high(arr)+1;
  392. result:=JLString.create(arr,0,i);
  393. end;
  394. Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
  395. var
  396. i : SizeInt;
  397. foundnull : boolean;
  398. begin
  399. if (zerobased) then
  400. begin
  401. foundnull:=false;
  402. for i:=low(arr) to high(arr) do
  403. if arr[i]=#0 then
  404. begin
  405. foundnull:=true;
  406. break;
  407. end;
  408. if not foundnull then
  409. i := high(arr)+1;
  410. end
  411. else
  412. i := high(arr)+1;
  413. result:=JLString.create(arr,0,i);
  414. end;
  415. (*
  416. { due to their names, the following procedures should be in wstrings.inc,
  417. however, the compiler generates code using this functions on all platforms }
  418. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  419. function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  420. var
  421. l: longint;
  422. index: longint;
  423. len: byte;
  424. temp: ansistring;
  425. begin
  426. l := high(arr)+1;
  427. if l>=256 then
  428. l:=255
  429. else if l<0 then
  430. l:=0;
  431. if zerobased then
  432. begin
  433. index:=IndexWord(arr[0],l,0);
  434. if (index < 0) then
  435. len := l
  436. else
  437. len := index;
  438. end
  439. else
  440. len := l;
  441. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  442. fpc_WideCharArray_To_ShortStr := temp;
  443. end;
  444. {$else FPC_STRTOSHORTSTRINGPROC}
  445. procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
  446. var
  447. l: longint;
  448. index: ptrint;
  449. len: byte;
  450. temp: ansistring;
  451. begin
  452. l := high(arr)+1;
  453. if l>=high(res)+1 then
  454. l:=high(res)
  455. else if l<0 then
  456. l:=0;
  457. if zerobased then
  458. begin
  459. index:=IndexWord(arr[0],l,0);
  460. if index<0 then
  461. len:=l
  462. else
  463. len:=index;
  464. end
  465. else
  466. len:=l;
  467. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
  468. res:=temp;
  469. end;
  470. {$endif FPC_STRTOSHORTSTRINGPROC}
  471. *)
  472. Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; zerobased: boolean = true): AnsiString; compilerproc;
  473. {$ifdef nounsupported}
  474. var
  475. i : SizeInt;
  476. {$endif}
  477. begin
  478. {$ifdef nounsupported}
  479. if (zerobased) then
  480. begin
  481. i:=IndexWord(arr,high(arr)+1,0);
  482. if i = -1 then
  483. i := high(arr)+1;
  484. end
  485. else
  486. i := high(arr)+1;
  487. SetLength(fpc_WideCharArray_To_AnsiStr,i);
  488. widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,i);
  489. {$endif}
  490. end;
  491. Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
  492. var
  493. i : SizeInt;
  494. foundnull : boolean;
  495. begin
  496. if (zerobased) then
  497. begin
  498. foundnull:=false;
  499. for i:=low(arr) to high(arr) do
  500. if arr[i]=#0 then
  501. begin
  502. foundnull:=true;
  503. break;
  504. end;
  505. if not foundnull then
  506. i := high(arr)+1;
  507. end
  508. else
  509. i := high(arr)+1;
  510. result:=JLString.create(arr,0,i);
  511. end;
  512. procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
  513. var
  514. i, len: SizeInt;
  515. temp: array of jbyte;
  516. begin
  517. len := length(src);
  518. { make sure we don't dereference src if it can be nil (JM) }
  519. if len > 0 then
  520. begin
  521. temp:=JLString(src).getBytes;
  522. if len > length(temp) then
  523. len := length(temp);
  524. for i := 0 to len-1 do
  525. res[i] := chr(temp[i]);
  526. end;
  527. end;
  528. procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
  529. var
  530. len: SizeInt;
  531. begin
  532. len := length(src);
  533. { make sure we don't dereference src if it can be nil (JM) }
  534. if len > 0 then
  535. begin
  536. if len > high(res)+1 then
  537. len := high(res)+1;
  538. JLString(src).getChars(0,len,res,0);
  539. end;
  540. end;
  541. function fpc_unicodestr_setchar(const s: UnicodeString; const index: longint; const ch: unicodechar): UnicodeString; compilerproc;
  542. var
  543. sb: JLStringBuilder;
  544. begin
  545. sb:=JLStringBuilder.create(s);
  546. { string indexes are 1-based in Pascal, 0-based in Java }
  547. sb.setCharAt(index-1,ch);
  548. result:=sb.toString();
  549. end;
  550. (*
  551. procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
  552. var
  553. len: SizeInt;
  554. temp: unicodestring;
  555. begin
  556. len := length(src);
  557. { make sure we don't dereference src if it can be nil (JM) }
  558. if len > 0 then
  559. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  560. len := length(temp);
  561. if len > length(res) then
  562. len := length(res);
  563. {$r-}
  564. move(temp[1],res[0],len*sizeof(unicodechar));
  565. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  566. {$ifdef RangeCheckWasOn}
  567. {$r+}
  568. {$endif}
  569. end;
  570. procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
  571. var
  572. len: longint;
  573. temp : unicodestring;
  574. begin
  575. len := length(src);
  576. { make sure we don't access char 1 if length is 0 (JM) }
  577. if len > 0 then
  578. widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
  579. len := length(temp);
  580. if len > length(res) then
  581. len := length(res);
  582. {$r-}
  583. move(temp[1],res[0],len*sizeof(unicodechar));
  584. fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
  585. {$ifdef RangeCheckWasOn}
  586. {$r+}
  587. {$endif}
  588. end;
  589. *)
  590. procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
  591. {$ifdef nounsupported}
  592. var
  593. len: SizeInt;
  594. temp: widestring;
  595. {$endif}
  596. begin
  597. {$ifdef nounsupported}
  598. len := length(src);
  599. { make sure we don't dereference src if it can be nil (JM) }
  600. if len > 0 then
  601. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  602. len := length(temp);
  603. if len > length(res) then
  604. len := length(res);
  605. {$r-}
  606. move(temp[1],res[0],len*sizeof(widechar));
  607. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  608. {$ifdef RangeCheckWasOn}
  609. {$r+}
  610. {$endif}
  611. {$endif}
  612. end;
  613. (*
  614. procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
  615. var
  616. len: longint;
  617. temp : widestring;
  618. begin
  619. len := length(src);
  620. { make sure we don't access char 1 if length is 0 (JM) }
  621. if len > 0 then
  622. widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
  623. len := length(temp);
  624. if len > length(res) then
  625. len := length(res);
  626. {$r-}
  627. move(temp[1],res[0],len*sizeof(widechar));
  628. fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
  629. {$ifdef RangeCheckWasOn}
  630. {$r+}
  631. {$endif}
  632. end;
  633. *)
  634. procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
  635. var
  636. i, len: SizeInt;
  637. begin
  638. len := length(src);
  639. if len > length(res) then
  640. len := length(res);
  641. for i:=0 to len-1 do
  642. res[i]:=src[i+1];
  643. end;
  644. Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt; compilerproc;
  645. {
  646. Compares 2 UnicodeStrings;
  647. The result is
  648. <0 if S1<S2
  649. 0 if S1=S2
  650. >0 if S1>S2
  651. }
  652. Var
  653. MaxI,Temp : SizeInt;
  654. begin
  655. if JLObject(S1)=JLObject(S2) then
  656. begin
  657. result:=0;
  658. exit;
  659. end;
  660. result:=JLString(S1).compareTo(S2);
  661. end;
  662. Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt; compilerproc;
  663. {
  664. Compares 2 UnicodeStrings for equality only;
  665. The result is
  666. 0 if S1=S2
  667. <>0 if S1<>S2
  668. }
  669. Var
  670. MaxI : SizeInt;
  671. begin
  672. result:=ord(not JLString(S1).equals(JLString(S2)));
  673. end;
  674. function fpc_UnicodeStr_SetLength(const S : UnicodeString; l : SizeInt): UnicodeString; compilerproc;
  675. {
  676. Sets The length of string S to L.
  677. Makes sure S is unique, and contains enough room.
  678. Returns new val
  679. }
  680. Var
  681. movelen: SizeInt;
  682. chars: array of widechar;
  683. strlen: SizeInt;
  684. begin
  685. if (l>0) then
  686. begin
  687. if JLObject(S)=nil then
  688. begin
  689. { Need a completely new string...}
  690. result:=NewUnicodeString(l);
  691. end
  692. { no need to create a new string, since Java strings are immutable }
  693. else
  694. begin
  695. strlen:=length(s);
  696. if l=strlen then
  697. result:=s
  698. else if (l<strlen) then
  699. result:=JLString(s).substring(0,l)
  700. else
  701. begin
  702. setlength(chars,l);
  703. JLString(s).getChars(0,strlen,chars,0);
  704. result:=JLString.create(chars,0,l)
  705. end;
  706. end
  707. end
  708. else
  709. begin
  710. result:='';
  711. end;
  712. end;
  713. {*****************************************************************************
  714. Public functions, In interface.
  715. *****************************************************************************}
  716. (*
  717. function UnicodeCharToString(S : PUnicodeChar) : AnsiString;
  718. begin
  719. result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
  720. end;
  721. function StringToUnicodeChar(const Src : AnsiString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  722. var
  723. temp:unicodestring;
  724. begin
  725. widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),temp,Length(Src));
  726. if Length(temp)<DestSize then
  727. move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
  728. else
  729. move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
  730. Dest[DestSize-1]:=#0;
  731. result:=Dest;
  732. end;
  733. function WideCharToString(S : PWideChar) : AnsiString;
  734. begin
  735. result:=WideCharLenToString(s,Length(WideString(s)));
  736. end;
  737. function StringToWideChar(const Src : AnsiString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  738. var
  739. temp:widestring;
  740. begin
  741. widestringmanager.Ansi2WideMoveProc(PChar(Src),temp,Length(Src));
  742. if Length(temp)<DestSize then
  743. move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
  744. else
  745. move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
  746. Dest[DestSize-1]:=#0;
  747. result:=Dest;
  748. end;
  749. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : AnsiString;
  750. begin
  751. //SetLength(result,Len);
  752. widestringmanager.Unicode2AnsiMoveproc(S,result,Len);
  753. end;
  754. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  755. begin
  756. Dest:=UnicodeCharLenToString(Src,Len);
  757. end;
  758. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  759. begin
  760. Dest:=UnicodeCharToString(S);
  761. end;
  762. function WideCharLenToString(S : PWideChar;Len : SizeInt) : AnsiString;
  763. begin
  764. //SetLength(result,Len);
  765. widestringmanager.Wide2AnsiMoveproc(S,result,Len);
  766. end;
  767. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  768. begin
  769. Dest:=WideCharLenToString(Src,Len);
  770. end;
  771. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  772. begin
  773. Dest:=WideCharToString(S);
  774. end;
  775. *)
  776. Function fpc_unicodestr_Unique(const S : JLObject): JLObject; compilerproc;
  777. begin
  778. result:=s;
  779. end;
  780. Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
  781. begin
  782. dec(index);
  783. if Index < 0 then
  784. Index := 0;
  785. { Check Size. Accounts for Zero-length S, the double check is needed because
  786. Size can be maxint and will get <0 when adding index }
  787. if (Size>Length(S)) or
  788. (Index+Size>Length(S)) then
  789. Size:=Length(S)-Index;
  790. If Size>0 then
  791. result:=JLString(s).subString(Index,Size)
  792. else
  793. result:='';
  794. end;
  795. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
  796. begin
  797. Pos:=0;
  798. if Length(SubStr)>0 then
  799. Pos:=JLString(Source).indexOf(SubStr)+1;
  800. end;
  801. { Faster version for a unicodechar alone }
  802. Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
  803. begin
  804. Pos:=0;
  805. if length(S)>0 then
  806. Pos:=JLString(s).indexOf(ord(c))+1;
  807. end;
  808. (*
  809. Function Pos (c : AnsiString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  810. begin
  811. result:=Pos(UnicodeString(c),s);
  812. end;
  813. Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  814. begin
  815. result:=Pos(UnicodeString(c),s);
  816. end;
  817. Function Pos (c : UnicodeString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  818. begin
  819. result:=Pos(c,UnicodeString(s));
  820. end;
  821. *)
  822. { Faster version for a char alone. Must be implemented because }
  823. { pos(c: char; const s: shortstring) also exists, so otherwise }
  824. { using pos(char,pchar) will always call the shortstring version }
  825. { (exact match for first argument), also with $h+ (JM) }
  826. Function Pos (c : AnsiChar; Const s : UnicodeString) : SizeInt;
  827. var
  828. i: SizeInt;
  829. wc : unicodechar;
  830. begin
  831. wc:=c;
  832. result:=Pos(wc,s);
  833. end;
  834. (*
  835. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  836. Var
  837. LS : SizeInt;
  838. begin
  839. LS:=Length(S);
  840. if (Index>LS) or (Index<=0) or (Size<=0) then
  841. exit;
  842. UniqueString (S);
  843. { (Size+Index) will overflow if Size=MaxInt. }
  844. if Size>LS-Index then
  845. Size:=LS-Index+1;
  846. if Size<=LS-Index then
  847. begin
  848. Dec(Index);
  849. Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
  850. end;
  851. Setlength(s,LS-Size);
  852. end;
  853. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  854. var
  855. Temp : UnicodeString;
  856. LS : SizeInt;
  857. begin
  858. If Length(Source)=0 then
  859. exit;
  860. if index <= 0 then
  861. index := 1;
  862. Ls:=Length(S);
  863. if index > LS then
  864. index := LS+1;
  865. Dec(Index);
  866. Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
  867. SetLength(Temp,Length(Source)+LS);
  868. If Index>0 then
  869. move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
  870. Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
  871. If (LS-Index)>0 then
  872. Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
  873. S:=Temp;
  874. end;
  875. *)
  876. Function UpCase(c:UnicodeChar):UnicodeChar;
  877. begin
  878. result:=JLCharacter.toUpperCase(c);
  879. end;
  880. function UpCase(const s : UnicodeString) : UnicodeString;
  881. begin
  882. result:=JLString(s).toUpperCase;
  883. end;
  884. (*
  885. Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
  886. begin
  887. SetLength(S,Len);
  888. If (Buf<>Nil) and (Len>0) then
  889. Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
  890. end;
  891. Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
  892. var
  893. BufLen: SizeInt;
  894. begin
  895. SetLength(S,Len);
  896. If (Buf<>Nil) and (Len>0) then
  897. widestringmanager.Ansi2UnicodeMoveProc(Buf,S,Len);
  898. end;
  899. {$ifndef FPUNONE}
  900. Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
  901. Var
  902. SS : String;
  903. begin
  904. fpc_Val_Real_UnicodeStr := 0;
  905. if length(S) > 255 then
  906. code := 256
  907. else
  908. begin
  909. SS := S;
  910. Val(SS,fpc_Val_Real_UnicodeStr,code);
  911. end;
  912. end;
  913. {$endif}
  914. function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
  915. var ss:shortstring;
  916. begin
  917. if length(s)>255 then
  918. code:=256
  919. else
  920. begin
  921. ss:=s;
  922. val(ss,fpc_val_enum_unicodestr,code);
  923. end;
  924. end;
  925. Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
  926. Var
  927. SS : String;
  928. begin
  929. if length(S) > 255 then
  930. begin
  931. fpc_Val_Currency_UnicodeStr:=0;
  932. code := 256;
  933. end
  934. else
  935. begin
  936. SS := S;
  937. Val(SS,fpc_Val_Currency_UnicodeStr,code);
  938. end;
  939. end;
  940. Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
  941. Var
  942. SS : ShortString;
  943. begin
  944. fpc_Val_UInt_UnicodeStr := 0;
  945. if length(S) > 255 then
  946. code := 256
  947. else
  948. begin
  949. SS := S;
  950. Val(SS,fpc_Val_UInt_UnicodeStr,code);
  951. end;
  952. end;
  953. Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
  954. Var
  955. SS : ShortString;
  956. begin
  957. fpc_Val_SInt_UnicodeStr:=0;
  958. if length(S)>255 then
  959. code:=256
  960. else
  961. begin
  962. SS := S;
  963. fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
  964. end;
  965. end;
  966. {$ifndef CPU64}
  967. Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
  968. Var
  969. SS : ShortString;
  970. begin
  971. fpc_Val_qword_UnicodeStr:=0;
  972. if length(S)>255 then
  973. code:=256
  974. else
  975. begin
  976. SS := S;
  977. Val(SS,fpc_Val_qword_UnicodeStr,Code);
  978. end;
  979. end;
  980. Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
  981. Var
  982. SS : ShortString;
  983. begin
  984. fpc_Val_int64_UnicodeStr:=0;
  985. if length(S)>255 then
  986. code:=256
  987. else
  988. begin
  989. SS := S;
  990. Val(SS,fpc_Val_int64_UnicodeStr,Code);
  991. end;
  992. end;
  993. {$endif CPU64}
  994. {$ifndef FPUNONE}
  995. procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
  996. var
  997. ss : shortstring;
  998. begin
  999. str_real(len,fr,d,treal_type(rt),ss);
  1000. s:=ss;
  1001. end;
  1002. {$endif}
  1003. procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
  1004. var ss:shortstring;
  1005. begin
  1006. fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
  1007. s:=ss;
  1008. end;
  1009. procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
  1010. var ss:shortstring;
  1011. begin
  1012. fpc_shortstr_bool(b,len,ss);
  1013. s:=ss;
  1014. end;
  1015. {$ifdef FPC_HAS_STR_CURRENCY}
  1016. procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
  1017. var
  1018. ss : shortstring;
  1019. begin
  1020. str(c:len:fr,ss);
  1021. s:=ss;
  1022. end;
  1023. {$endif FPC_HAS_STR_CURRENCY}
  1024. Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
  1025. Var
  1026. SS : ShortString;
  1027. begin
  1028. Str (v:Len,SS);
  1029. S:=SS;
  1030. end;
  1031. Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
  1032. Var
  1033. SS : ShortString;
  1034. begin
  1035. str(v:Len,SS);
  1036. S:=SS;
  1037. end;
  1038. {$ifndef CPU64}
  1039. Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
  1040. Var
  1041. SS : ShortString;
  1042. begin
  1043. Str (v:Len,SS);
  1044. S:=SS;
  1045. end;
  1046. Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
  1047. Var
  1048. SS : ShortString;
  1049. begin
  1050. str(v:Len,SS);
  1051. S:=SS;
  1052. end;
  1053. {$endif CPU64}
  1054. *)
  1055. (*
  1056. { converts an utf-16 code point or surrogate pair to utf-32 }
  1057. function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
  1058. var
  1059. w: unicodechar;
  1060. begin
  1061. { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
  1062. { are the same in UTF-32 }
  1063. w:=s[index];
  1064. if (w<=#$d7ff) or
  1065. (w>=#$e000) then
  1066. begin
  1067. result:=UCS4Char(w);
  1068. len:=1;
  1069. end
  1070. { valid surrogate pair? }
  1071. else if (w<=#$dbff) and
  1072. { w>=#$d7ff check not needed, checked above }
  1073. (index<length(s)) and
  1074. (s[index+1]>=#$dc00) and
  1075. (s[index+1]<=#$dfff) then
  1076. { convert the surrogate pair to UTF-32 }
  1077. begin
  1078. result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
  1079. len:=2;
  1080. end
  1081. else
  1082. { invalid surrogate -> do nothing }
  1083. begin
  1084. result:=UCS4Char(w);
  1085. len:=1;
  1086. end;
  1087. end;
  1088. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1089. begin
  1090. if assigned(Source) then
  1091. Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
  1092. else
  1093. Result:=0;
  1094. end;
  1095. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  1096. var
  1097. i,j : SizeUInt;
  1098. w : word;
  1099. lw : longword;
  1100. len : longint;
  1101. begin
  1102. result:=0;
  1103. if source=nil then
  1104. exit;
  1105. i:=0;
  1106. j:=0;
  1107. if assigned(Dest) then
  1108. begin
  1109. while (i<SourceChars) and (j<MaxDestBytes) do
  1110. begin
  1111. w:=word(Source[i]);
  1112. case w of
  1113. 0..$7f:
  1114. begin
  1115. Dest[j]:=char(w);
  1116. inc(j);
  1117. end;
  1118. $80..$7ff:
  1119. begin
  1120. if j+1>=MaxDestBytes then
  1121. break;
  1122. Dest[j]:=char($c0 or (w shr 6));
  1123. Dest[j+1]:=char($80 or (w and $3f));
  1124. inc(j,2);
  1125. end;
  1126. $800..$d7ff,$e000..$ffff:
  1127. begin
  1128. if j+2>=MaxDestBytes then
  1129. break;
  1130. Dest[j]:=char($e0 or (w shr 12));
  1131. Dest[j+1]:=char($80 or ((w shr 6) and $3f));
  1132. Dest[j+2]:=char($80 or (w and $3f));
  1133. inc(j,3);
  1134. end;
  1135. $d800..$dbff:
  1136. {High Surrogates}
  1137. begin
  1138. if j+3>=MaxDestBytes then
  1139. break;
  1140. if (i<sourcechars-1) and
  1141. (word(Source[i+1]) >= $dc00) and
  1142. (word(Source[i+1]) <= $dfff) then
  1143. begin
  1144. lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
  1145. Dest[j]:=char($f0 or (lw shr 18));
  1146. Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
  1147. Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
  1148. Dest[j+3]:=char($80 or (lw and $3f));
  1149. inc(j,4);
  1150. inc(i);
  1151. end;
  1152. end;
  1153. end;
  1154. inc(i);
  1155. end;
  1156. if j>SizeUInt(MaxDestBytes-1) then
  1157. j:=MaxDestBytes-1;
  1158. Dest[j]:=#0;
  1159. end
  1160. else
  1161. begin
  1162. while i<SourceChars do
  1163. begin
  1164. case word(Source[i]) of
  1165. $0..$7f:
  1166. inc(j);
  1167. $80..$7ff:
  1168. inc(j,2);
  1169. $800..$d7ff,$e000..$ffff:
  1170. inc(j,3);
  1171. $d800..$dbff:
  1172. begin
  1173. if (i<sourcechars-1) and
  1174. (word(Source[i+1]) >= $dc00) and
  1175. (word(Source[i+1]) <= $dfff) then
  1176. begin
  1177. inc(j,4);
  1178. inc(i);
  1179. end;
  1180. end;
  1181. end;
  1182. inc(i);
  1183. end;
  1184. end;
  1185. result:=j+1;
  1186. end;
  1187. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1188. begin
  1189. if assigned(Source) then
  1190. Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
  1191. else
  1192. Result:=0;
  1193. end;
  1194. function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  1195. const
  1196. UNICODE_INVALID=63;
  1197. var
  1198. InputUTF8: SizeUInt;
  1199. IBYTE: BYTE;
  1200. OutputUnicode: SizeUInt;
  1201. PRECHAR: SizeUInt;
  1202. TempBYTE: BYTE;
  1203. CharLen: SizeUint;
  1204. LookAhead: SizeUInt;
  1205. UC: SizeUInt;
  1206. begin
  1207. if not assigned(Source) then
  1208. begin
  1209. result:=0;
  1210. exit;
  1211. end;
  1212. result:=SizeUInt(-1);
  1213. InputUTF8:=0;
  1214. OutputUnicode:=0;
  1215. PreChar:=0;
  1216. if Assigned(Dest) Then
  1217. begin
  1218. while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
  1219. begin
  1220. IBYTE:=byte(Source[InputUTF8]);
  1221. if (IBYTE and $80) = 0 then
  1222. begin
  1223. //One character US-ASCII, convert it to unicode
  1224. if IBYTE = 10 then
  1225. begin
  1226. If (PreChar<>13) and FALSE then
  1227. begin
  1228. //Expand to crlf, conform UTF-8.
  1229. //This procedure will break the memory alocation by
  1230. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1231. if OutputUnicode+1<MaxDestChars then
  1232. begin
  1233. Dest[OutputUnicode]:=WideChar(13);
  1234. inc(OutputUnicode);
  1235. Dest[OutputUnicode]:=WideChar(10);
  1236. inc(OutputUnicode);
  1237. PreChar:=10;
  1238. end
  1239. else
  1240. begin
  1241. Dest[OutputUnicode]:=WideChar(13);
  1242. inc(OutputUnicode);
  1243. end;
  1244. end
  1245. else
  1246. begin
  1247. Dest[OutputUnicode]:=WideChar(IBYTE);
  1248. inc(OutputUnicode);
  1249. PreChar:=IBYTE;
  1250. end;
  1251. end
  1252. else
  1253. begin
  1254. Dest[OutputUnicode]:=WideChar(IBYTE);
  1255. inc(OutputUnicode);
  1256. PreChar:=IBYTE;
  1257. end;
  1258. inc(InputUTF8);
  1259. end
  1260. else
  1261. begin
  1262. TempByte:=IBYTE;
  1263. CharLen:=0;
  1264. while (TempBYTE and $80)<>0 do
  1265. begin
  1266. TempBYTE:=(TempBYTE shl 1) and $FE;
  1267. inc(CharLen);
  1268. end;
  1269. //Test for the "CharLen" conforms UTF-8 string
  1270. //This means the 10xxxxxx pattern.
  1271. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1272. begin
  1273. //Insuficient chars in string to decode
  1274. //UTF-8 array. Fallback to single char.
  1275. CharLen:= 1;
  1276. end;
  1277. for LookAhead := 1 to CharLen-1 do
  1278. begin
  1279. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1280. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1281. begin
  1282. //Invalid UTF-8 sequence, fallback.
  1283. CharLen:= LookAhead;
  1284. break;
  1285. end;
  1286. end;
  1287. UC:=$FFFF;
  1288. case CharLen of
  1289. 1: begin
  1290. //Not valid UTF-8 sequence
  1291. UC:=UNICODE_INVALID;
  1292. end;
  1293. 2: begin
  1294. //Two bytes UTF, convert it
  1295. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1296. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1297. if UC <= $7F then
  1298. begin
  1299. //Invalid UTF sequence.
  1300. UC:=UNICODE_INVALID;
  1301. end;
  1302. end;
  1303. 3: begin
  1304. //Three bytes, convert it to unicode
  1305. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1306. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1307. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1308. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1309. begin
  1310. //Invalid UTF-8 sequence
  1311. UC:= UNICODE_INVALID;
  1312. End;
  1313. end;
  1314. 4: begin
  1315. //Four bytes, convert it to two unicode characters
  1316. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1317. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1318. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1319. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1320. if (UC < $10000) or (UC > $10FFFF) then
  1321. begin
  1322. UC:= UNICODE_INVALID;
  1323. end
  1324. else
  1325. begin
  1326. { only store pair if room }
  1327. dec(UC,$10000);
  1328. if (OutputUnicode<MaxDestChars-1) then
  1329. begin
  1330. Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
  1331. inc(OutputUnicode);
  1332. UC:=(UC and $3ff) + $DC00;
  1333. end
  1334. else
  1335. begin
  1336. InputUTF8:= InputUTF8 + CharLen;
  1337. { don't store anything }
  1338. CharLen:=0;
  1339. end;
  1340. end;
  1341. end;
  1342. 5,6,7: begin
  1343. //Invalid UTF8 to unicode conversion,
  1344. //mask it as invalid UNICODE too.
  1345. UC:=UNICODE_INVALID;
  1346. end;
  1347. end;
  1348. if CharLen > 0 then
  1349. begin
  1350. PreChar:=UC;
  1351. Dest[OutputUnicode]:=WideChar(UC);
  1352. inc(OutputUnicode);
  1353. end;
  1354. InputUTF8:= InputUTF8 + CharLen;
  1355. end;
  1356. end;
  1357. Result:=OutputUnicode+1;
  1358. end
  1359. else
  1360. begin
  1361. while (InputUTF8<SourceBytes) do
  1362. begin
  1363. IBYTE:=byte(Source[InputUTF8]);
  1364. if (IBYTE and $80) = 0 then
  1365. begin
  1366. //One character US-ASCII, convert it to unicode
  1367. if IBYTE = 10 then
  1368. begin
  1369. if (PreChar<>13) and FALSE then
  1370. begin
  1371. //Expand to crlf, conform UTF-8.
  1372. //This procedure will break the memory alocation by
  1373. //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
  1374. inc(OutputUnicode,2);
  1375. PreChar:=10;
  1376. end
  1377. else
  1378. begin
  1379. inc(OutputUnicode);
  1380. PreChar:=IBYTE;
  1381. end;
  1382. end
  1383. else
  1384. begin
  1385. inc(OutputUnicode);
  1386. PreChar:=IBYTE;
  1387. end;
  1388. inc(InputUTF8);
  1389. end
  1390. else
  1391. begin
  1392. TempByte:=IBYTE;
  1393. CharLen:=0;
  1394. while (TempBYTE and $80)<>0 do
  1395. begin
  1396. TempBYTE:=(TempBYTE shl 1) and $FE;
  1397. inc(CharLen);
  1398. end;
  1399. //Test for the "CharLen" conforms UTF-8 string
  1400. //This means the 10xxxxxx pattern.
  1401. if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
  1402. begin
  1403. //Insuficient chars in string to decode
  1404. //UTF-8 array. Fallback to single char.
  1405. CharLen:= 1;
  1406. end;
  1407. for LookAhead := 1 to CharLen-1 do
  1408. begin
  1409. if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
  1410. ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
  1411. begin
  1412. //Invalid UTF-8 sequence, fallback.
  1413. CharLen:= LookAhead;
  1414. break;
  1415. end;
  1416. end;
  1417. UC:=$FFFF;
  1418. case CharLen of
  1419. 1: begin
  1420. //Not valid UTF-8 sequence
  1421. UC:=UNICODE_INVALID;
  1422. end;
  1423. 2: begin
  1424. //Two bytes UTF, convert it
  1425. UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
  1426. UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
  1427. if UC <= $7F then
  1428. begin
  1429. //Invalid UTF sequence.
  1430. UC:=UNICODE_INVALID;
  1431. end;
  1432. end;
  1433. 3: begin
  1434. //Three bytes, convert it to unicode
  1435. UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
  1436. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
  1437. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
  1438. If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  1439. begin
  1440. //Invalid UTF-8 sequence
  1441. UC:= UNICODE_INVALID;
  1442. end;
  1443. end;
  1444. 4: begin
  1445. //Four bytes, convert it to two unicode characters
  1446. UC:= (byte(Source[InputUTF8]) and $07) shl 18;
  1447. UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
  1448. UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
  1449. UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
  1450. if (UC < $10000) or (UC > $10FFFF) then
  1451. UC:= UNICODE_INVALID
  1452. else
  1453. { extra character character }
  1454. inc(OutputUnicode);
  1455. end;
  1456. 5,6,7: begin
  1457. //Invalid UTF8 to unicode conversion,
  1458. //mask it as invalid UNICODE too.
  1459. UC:=UNICODE_INVALID;
  1460. end;
  1461. end;
  1462. if CharLen > 0 then
  1463. begin
  1464. PreChar:=UC;
  1465. inc(OutputUnicode);
  1466. end;
  1467. InputUTF8:= InputUTF8 + CharLen;
  1468. end;
  1469. end;
  1470. Result:=OutputUnicode+1;
  1471. end;
  1472. end;
  1473. function UTF8Encode(const s : Ansistring) : UTF8String; inline;
  1474. begin
  1475. Result:=UTF8Encode(UnicodeString(s));
  1476. end;
  1477. function UTF8Encode(const s : UnicodeString) : UTF8String;
  1478. var
  1479. i : SizeInt;
  1480. hs : UTF8String;
  1481. begin
  1482. result:='';
  1483. if s='' then
  1484. exit;
  1485. SetLength(hs,length(s)*3);
  1486. i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
  1487. if i>0 then
  1488. begin
  1489. SetLength(hs,i-1);
  1490. result:=hs;
  1491. end;
  1492. end;
  1493. function UTF8Decode(const s : UTF8String): UnicodeString;
  1494. var
  1495. i : SizeInt;
  1496. hs : UnicodeString;
  1497. begin
  1498. result:='';
  1499. if s='' then
  1500. exit;
  1501. SetLength(hs,length(s));
  1502. i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
  1503. if i>0 then
  1504. begin
  1505. SetLength(hs,i-1);
  1506. result:=hs;
  1507. end;
  1508. end;
  1509. function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
  1510. begin
  1511. Result:=Utf8Encode(s);
  1512. end;
  1513. function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
  1514. begin
  1515. Result:=Utf8Decode(s);
  1516. end;
  1517. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  1518. var
  1519. i, slen,
  1520. destindex : SizeInt;
  1521. len : longint;
  1522. begin
  1523. slen:=length(s);
  1524. setlength(result,slen+1);
  1525. i:=1;
  1526. destindex:=0;
  1527. while (i<=slen) do
  1528. begin
  1529. result[destindex]:=utf16toutf32(s,i,len);
  1530. inc(destindex);
  1531. inc(i,len);
  1532. end;
  1533. { destindex <= slen (surrogate pairs may have been merged) }
  1534. { destindex+1 for terminating #0 (dynamic arrays are }
  1535. { implicitely filled with zero) }
  1536. setlength(result,destindex+1);
  1537. end;
  1538. { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
  1539. procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
  1540. var
  1541. p : PUnicodeChar;
  1542. begin
  1543. { if nc > $ffff, we need two places }
  1544. if (index+ord(nc > $ffff)>length(s)) then
  1545. if (length(s) < 10*256) then
  1546. setlength(s,length(s)+10)
  1547. else
  1548. setlength(s,length(s)+length(s) shr 8);
  1549. { we know that s is unique -> avoid uniquestring calls}
  1550. p:=@s[index];
  1551. if (nc<$ffff) then
  1552. begin
  1553. p^:=unicodechar(nc);
  1554. inc(index);
  1555. end
  1556. else if (dword(nc)<=$10ffff) then
  1557. begin
  1558. p^:=unicodechar((nc - $10000) shr 10 + $d800);
  1559. (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
  1560. inc(index,2);
  1561. end
  1562. else
  1563. { invalid code point }
  1564. begin
  1565. p^:='?';
  1566. inc(index);
  1567. end;
  1568. end;
  1569. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  1570. var
  1571. i : SizeInt;
  1572. resindex : SizeInt;
  1573. begin
  1574. { skip terminating #0 }
  1575. SetLength(result,length(s)-1);
  1576. resindex:=1;
  1577. for i:=0 to high(s)-1 do
  1578. ConcatUTF32ToUnicodeStr(s[i],result,resindex);
  1579. { adjust result length (may be too big due to growing }
  1580. { for surrogate pairs) }
  1581. setlength(result,resindex-1);
  1582. end;
  1583. function WideStringToUCS4String(const s : WideString) : UCS4String;
  1584. var
  1585. i, slen,
  1586. destindex : SizeInt;
  1587. len : longint;
  1588. begin
  1589. slen:=length(s);
  1590. setlength(result,slen+1);
  1591. i:=1;
  1592. destindex:=0;
  1593. while (i<=slen) do
  1594. begin
  1595. result[destindex]:=utf16toutf32(s,i,len);
  1596. inc(destindex);
  1597. inc(i,len);
  1598. end;
  1599. { destindex <= slen (surrogate pairs may have been merged) }
  1600. { destindex+1 for terminating #0 (dynamic arrays are }
  1601. { implicitely filled with zero) }
  1602. setlength(result,destindex+1);
  1603. end;
  1604. { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
  1605. procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
  1606. var
  1607. p : PWideChar;
  1608. begin
  1609. { if nc > $ffff, we need two places }
  1610. if (index+ord(nc > $ffff)>length(s)) then
  1611. if (length(s) < 10*256) then
  1612. setlength(s,length(s)+10)
  1613. else
  1614. setlength(s,length(s)+length(s) shr 8);
  1615. { we know that s is unique -> avoid uniquestring calls}
  1616. p:=@s[index];
  1617. if (nc<$ffff) then
  1618. begin
  1619. p^:=widechar(nc);
  1620. inc(index);
  1621. end
  1622. else if (dword(nc)<=$10ffff) then
  1623. begin
  1624. p^:=widechar((nc - $10000) shr 10 + $d800);
  1625. (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
  1626. inc(index,2);
  1627. end
  1628. else
  1629. { invalid code point }
  1630. begin
  1631. p^:='?';
  1632. inc(index);
  1633. end;
  1634. end;
  1635. function UCS4StringToWideString(const s : UCS4String) : WideString;
  1636. var
  1637. i : SizeInt;
  1638. resindex : SizeInt;
  1639. begin
  1640. { skip terminating #0 }
  1641. SetLength(result,length(s)-1);
  1642. resindex:=1;
  1643. for i:=0 to high(s)-1 do
  1644. ConcatUTF32ToWideStr(s[i],result,resindex);
  1645. { adjust result length (may be too big due to growing }
  1646. { for surrogate pairs) }
  1647. setlength(result,resindex-1);
  1648. end;
  1649. const
  1650. SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
  1651. SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
  1652. *)
  1653. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  1654. begin
  1655. widestringmanager.collator.setStrength(JTCollator.IDENTICAL);
  1656. result:=widestringmanager.collator.compare(s1,s2);
  1657. end;
  1658. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  1659. begin
  1660. widestringmanager.collator.setStrength(JTCollator.TERTIARY);
  1661. result:=widestringmanager.collator.compare(s1,s2);
  1662. end;
  1663. constructor TUnicodeStringManager.create;
  1664. begin
  1665. end;
  1666. procedure initunicodestringmanager;
  1667. begin
  1668. widestringmanager:=TUnicodeStringManager.create;
  1669. widestringmanager.collator:=JTCollator.getInstance;
  1670. end;