cwstring.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = CP_UTF8) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  108. // fallback implementation
  109. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  110. exit;
  111. end;
  112. len2:=len*3;
  113. SetLength(dest, len2);
  114. err:=0;
  115. if conv <> nil then
  116. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  117. else begin
  118. // Use UTF-8 conversion from RTL
  119. cp:=CP_UTF8;
  120. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  121. end;
  122. if len2 > Length(dest) then begin
  123. SetLength(dest, len2);
  124. err:=0;
  125. if conv <> nil then
  126. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  127. else
  128. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  129. end;
  130. if len2 < 0 then
  131. len2:=0;
  132. SetLength(dest, len2);
  133. SetCodePage(dest, cp, False);
  134. end;
  135. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  136. var
  137. len2: SizeInt;
  138. conv: PUConverter;
  139. err: UErrorCode;
  140. begin
  141. if len = 0 then begin
  142. dest:='';
  143. exit;
  144. end;
  145. conv:=GetConverter(cp);
  146. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  147. // fallback implementation
  148. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  149. exit;
  150. end;
  151. len2:=len;
  152. SetLength(dest, len2);
  153. err:=0;
  154. if conv <> nil then
  155. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  156. else
  157. // Use UTF-8 conversion from RTL
  158. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  159. if len2 > Length(dest) then begin
  160. SetLength(dest, len2);
  161. err:=0;
  162. if conv <> nil then
  163. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  164. else
  165. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  166. end;
  167. if len2 < 0 then
  168. len2:=0;
  169. SetLength(dest, len2);
  170. end;
  171. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  172. var
  173. len, len2: SizeInt;
  174. err: UErrorCode;
  175. begin
  176. if hlibICU = 0 then begin
  177. // fallback implementation
  178. Result:=UnicodeString(UpCase(AnsiString(s)));
  179. exit;
  180. end;
  181. len:=Length(s);
  182. SetLength(Result, len);
  183. if len = 0 then
  184. exit;
  185. err:=0;
  186. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  187. if len2 > len then begin
  188. SetLength(Result, len2);
  189. err:=0;
  190. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  191. end;
  192. SetLength(Result, len2);
  193. end;
  194. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  195. var
  196. len, len2: SizeInt;
  197. err: UErrorCode;
  198. begin
  199. if hlibICU = 0 then begin
  200. // fallback implementation
  201. Result:=UnicodeString(LowerCase(AnsiString(s)));
  202. exit;
  203. end;
  204. len:=Length(s);
  205. SetLength(Result, len);
  206. if len = 0 then
  207. exit;
  208. err:=0;
  209. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  210. if len2 > len then begin
  211. SetLength(Result, len2);
  212. err:=0;
  213. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  214. end;
  215. SetLength(Result, len2);
  216. end;
  217. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  218. var
  219. count, count1, count2: SizeInt;
  220. begin
  221. result := 0;
  222. Count1 := Length(S1);
  223. Count2 := Length(S2);
  224. if Count1>Count2 then
  225. Count:=Count2
  226. else
  227. Count:=Count1;
  228. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  229. if result=0 then
  230. result:=Count1 - Count2;
  231. end;
  232. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  233. begin
  234. if hlibICU = 0 then begin
  235. // fallback implementation
  236. Result:=_CompareStr(s1, s2);
  237. exit;
  238. end;
  239. InitThreadData;
  240. if DefColl <> nil then
  241. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  242. else
  243. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  244. end;
  245. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  246. const
  247. U_COMPARE_CODE_POINT_ORDER = $8000;
  248. var
  249. err: UErrorCode;
  250. begin
  251. if hlibICU = 0 then begin
  252. // fallback implementation
  253. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  254. exit;
  255. end;
  256. err:=0;
  257. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  258. end;
  259. function UpperAnsiString(const s : AnsiString) : AnsiString;
  260. begin
  261. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  262. end;
  263. function LowerAnsiString(const s : AnsiString) : AnsiString;
  264. begin
  265. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  266. end;
  267. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  268. begin
  269. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  270. end;
  271. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  272. begin
  273. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  274. end;
  275. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  276. begin
  277. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  278. end;
  279. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  280. begin
  281. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  282. end;
  283. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  284. var
  285. as1, as2: ansistring;
  286. begin
  287. SetString(as1, S1, MaxLen);
  288. SetString(as2, S2, MaxLen);
  289. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
  290. end;
  291. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  292. var
  293. as1, as2: ansistring;
  294. begin
  295. SetString(as1, S1, MaxLen);
  296. SetString(as2, S2, MaxLen);
  297. Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
  298. end;
  299. function AnsiStrLower(Str: PChar): PChar;
  300. var
  301. s, res: ansistring;
  302. begin
  303. s:=Str;
  304. res:=LowerAnsiString(s);
  305. if Length(res) > Length(s) then
  306. SetLength(res, Length(s));
  307. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  308. Result:=Str;
  309. end;
  310. function AnsiStrUpper(Str: PChar): PChar;
  311. var
  312. s, res: ansistring;
  313. begin
  314. s:=Str;
  315. res:=UpperAnsiString(s);
  316. if Length(res) > Length(s) then
  317. SetLength(res, Length(s));
  318. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  319. Result:=Str;
  320. end;
  321. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  322. var
  323. c: byte;
  324. begin
  325. // Only UTF-8 encoding is supported
  326. c:=byte(Str^);
  327. if c = 0 then
  328. Result:=0
  329. else begin
  330. Result:=1;
  331. if c < $80 then
  332. exit; // 1-byte ASCII char
  333. while c and $C0 = $C0 do begin
  334. Inc(Result);
  335. c:=c shl 1;
  336. end;
  337. if Result > 6 then
  338. Result:=1 // Invalid code point
  339. else
  340. if Result > MaxLookAead then
  341. Result:=-1; // Incomplete code point
  342. end;
  343. end;
  344. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  345. begin
  346. Result := CP_UTF8; // Android always uses UTF-8
  347. end;
  348. procedure SetStdIOCodePage(var T: Text); inline;
  349. begin
  350. case TextRec(T).Mode of
  351. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  352. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  353. end;
  354. end;
  355. procedure SetStdIOCodePages; inline;
  356. begin
  357. SetStdIOCodePage(Input);
  358. SetStdIOCodePage(Output);
  359. SetStdIOCodePage(ErrOutput);
  360. SetStdIOCodePage(StdOut);
  361. SetStdIOCodePage(StdErr);
  362. end;
  363. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  364. var
  365. us: UnicodeString;
  366. begin
  367. Ansi2UnicodeMove(source,cp,us,len);
  368. dest:=us;
  369. end;
  370. function UpperWideString(const s : WideString) : WideString;
  371. begin
  372. Result:=UpperUnicodeString(s);
  373. end;
  374. function LowerWideString(const s : WideString) : WideString;
  375. begin
  376. Result:=LowerUnicodeString(s);
  377. end;
  378. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  379. begin
  380. Result:=CompareUnicodeString(s1, s2);
  381. end;
  382. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  383. begin
  384. Result:=CompareTextUnicodeString(s1, s2);
  385. end;
  386. Procedure SetCWideStringManager;
  387. Var
  388. CWideStringManager : TUnicodeStringManager;
  389. begin
  390. CWideStringManager:=widestringmanager;
  391. With CWideStringManager do
  392. begin
  393. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  394. Ansi2WideMoveProc:=@Ansi2WideMove;
  395. UpperWideStringProc:=@UpperWideString;
  396. LowerWideStringProc:=@LowerWideString;
  397. CompareWideStringProc:=@CompareWideString;
  398. CompareTextWideStringProc:=@CompareTextWideString;
  399. UpperAnsiStringProc:=@UpperAnsiString;
  400. LowerAnsiStringProc:=@LowerAnsiString;
  401. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  402. CompareTextAnsiStringProc:=@AnsiCompareText;
  403. StrCompAnsiStringProc:=@StrCompAnsi;
  404. StrICompAnsiStringProc:=@AnsiStrIComp;
  405. StrLCompAnsiStringProc:=@AnsiStrLComp;
  406. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  407. StrLowerAnsiStringProc:=@AnsiStrLower;
  408. StrUpperAnsiStringProc:=@AnsiStrUpper;
  409. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  410. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  411. UpperUnicodeStringProc:=@UpperUnicodeString;
  412. LowerUnicodeStringProc:=@LowerUnicodeString;
  413. CompareUnicodeStringProc:=@CompareUnicodeString;
  414. CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  415. GetStandardCodePageProc:=@GetStandardCodePage;
  416. CodePointLengthProc:=@CodePointLength;
  417. end;
  418. SetUnicodeStringManager(CWideStringManager);
  419. end;
  420. procedure UnloadICU;
  421. begin
  422. if hlibICUi18n <> 0 then begin
  423. if DefColl <> nil then
  424. ucol_close(DefColl);
  425. UnloadLibrary(hlibICUi18n);
  426. hlibICUi18n:=0;
  427. end;
  428. if hlibICU <> 0 then begin
  429. if DefConv <> nil then
  430. ucnv_close(DefConv);
  431. if LastConv <> nil then
  432. ucnv_close(LastConv);
  433. UnloadLibrary(hlibICU);
  434. hlibICU:=0;
  435. end;
  436. end;
  437. procedure LoadICU;
  438. var
  439. LibVer: ansistring;
  440. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  441. var
  442. p: pointer;
  443. begin
  444. if hLib = 0 then
  445. hLib:=hlibICU;
  446. p:=GetProcedureAddress(hlib, Name + LibVer);
  447. if p = nil then begin
  448. // unload lib on failure
  449. UnloadICU;
  450. Result:=False;
  451. end
  452. else begin
  453. pointer(ProcPtr):=p;
  454. Result:=True;
  455. end;
  456. end;
  457. const
  458. ICUver: array [1..9] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55');
  459. TestProcName = 'ucnv_open';
  460. var
  461. i: longint;
  462. s: ansistring;
  463. begin
  464. hlibICU:=LoadLibrary('libicuuc.so');
  465. hlibICUi18n:=LoadLibrary('libicui18n.so');
  466. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  467. UnloadICU;
  468. exit;
  469. end;
  470. // Finding ICU version using known versions table
  471. for i:=High(ICUver) downto Low(ICUver) do begin
  472. s:='_' + ICUver[i];
  473. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  474. LibVer:=s;
  475. break;
  476. end;
  477. end;
  478. if LibVer = '' then begin
  479. // Finding unknown ICU version
  480. Val(ICUver[High(ICUver)], i);
  481. repeat
  482. Inc(i);
  483. Str(i, s);
  484. s:='_' + s;
  485. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  486. LibVer:=s;
  487. break;
  488. end;
  489. until i >= 100;
  490. end;
  491. if LibVer = '' then begin
  492. // Trying versionless name
  493. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  494. // Unable to get ICU version
  495. UnloadICU;
  496. exit;
  497. end;
  498. end;
  499. if not _GetProc('ucnv_open', ucnv_open) then exit;
  500. if not _GetProc('ucnv_close', ucnv_close) then exit;
  501. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  502. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  503. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  504. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  505. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  506. if not _GetProc('u_strToLower', u_strToLower) then exit;
  507. if not _GetProc('u_strCompare', u_strCompare) then exit;
  508. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  509. if not _GetProc('u_errorName', u_errorName) then exit;
  510. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  511. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  512. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  513. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  514. end;
  515. initialization
  516. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  517. DefaultUnicodeCodePage:=CP_UTF16;
  518. LoadICU;
  519. SetCWideStringManager;
  520. SetStdIOCodePages;
  521. finalization
  522. UnloadICU;
  523. end.