cwstring.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = CP_UTF8) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  108. // fallback implementation
  109. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  110. exit;
  111. end;
  112. len2:=len*3;
  113. SetLength(dest, len2);
  114. err:=0;
  115. if conv <> nil then
  116. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  117. else begin
  118. // Use UTF-8 conversion from RTL
  119. cp:=CP_UTF8;
  120. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  121. end;
  122. if len2 > Length(dest) then begin
  123. SetLength(dest, len2);
  124. err:=0;
  125. if conv <> nil then
  126. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  127. else
  128. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  129. end;
  130. if len2 < 0 then
  131. len2:=0;
  132. SetLength(dest, len2);
  133. SetCodePage(dest, cp, False);
  134. end;
  135. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  136. var
  137. len2: SizeInt;
  138. conv: PUConverter;
  139. err: UErrorCode;
  140. begin
  141. if len = 0 then begin
  142. dest:='';
  143. exit;
  144. end;
  145. conv:=GetConverter(cp);
  146. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  147. // fallback implementation
  148. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  149. exit;
  150. end;
  151. len2:=len;
  152. SetLength(dest, len2);
  153. err:=0;
  154. if conv <> nil then
  155. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  156. else
  157. // Use UTF-8 conversion from RTL
  158. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  159. if len2 > Length(dest) then begin
  160. SetLength(dest, len2);
  161. err:=0;
  162. if conv <> nil then
  163. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  164. else
  165. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  166. end;
  167. if len2 < 0 then
  168. len2:=0;
  169. SetLength(dest, len2);
  170. end;
  171. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  172. var
  173. len, len2: SizeInt;
  174. err: UErrorCode;
  175. begin
  176. if hlibICU = 0 then begin
  177. // fallback implementation
  178. Result:=UnicodeString(UpCase(AnsiString(s)));
  179. exit;
  180. end;
  181. len:=Length(s);
  182. SetLength(Result, len);
  183. if len = 0 then
  184. exit;
  185. err:=0;
  186. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  187. if len2 > len then begin
  188. SetLength(Result, len2);
  189. err:=0;
  190. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  191. end;
  192. SetLength(Result, len2);
  193. end;
  194. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  195. var
  196. len, len2: SizeInt;
  197. err: UErrorCode;
  198. begin
  199. if hlibICU = 0 then begin
  200. // fallback implementation
  201. Result:=UnicodeString(LowerCase(AnsiString(s)));
  202. exit;
  203. end;
  204. len:=Length(s);
  205. SetLength(Result, len);
  206. if len = 0 then
  207. exit;
  208. err:=0;
  209. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  210. if len2 > len then begin
  211. SetLength(Result, len2);
  212. err:=0;
  213. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  214. end;
  215. SetLength(Result, len2);
  216. end;
  217. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  218. var
  219. count, count1, count2: SizeInt;
  220. begin
  221. result := 0;
  222. Count1 := Length(S1);
  223. Count2 := Length(S2);
  224. if Count1>Count2 then
  225. Count:=Count2
  226. else
  227. Count:=Count1;
  228. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  229. if result=0 then
  230. result:=Count1 - Count2;
  231. end;
  232. function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  233. begin
  234. if hlibICU = 0 then begin
  235. // fallback implementation
  236. Result:=_CompareStr(s1, s2);
  237. exit;
  238. end;
  239. InitThreadData;
  240. if (coIgnoreCase in Options) then
  241. u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  242. else
  243. if DefColl <> nil then
  244. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  245. else
  246. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  247. end;
  248. function CompareTextUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions): PtrInt;
  249. const
  250. U_COMPARE_CODE_POINT_ORDER = $8000;
  251. var
  252. err: UErrorCode;
  253. begin
  254. if hlibICU = 0 then begin
  255. // fallback implementation
  256. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  257. exit;
  258. end;
  259. err:=0;
  260. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  261. end;
  262. function UpperAnsiString(const s : AnsiString) : AnsiString;
  263. begin
  264. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  265. end;
  266. function LowerAnsiString(const s : AnsiString) : AnsiString;
  267. begin
  268. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  269. end;
  270. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  271. begin
  272. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  273. end;
  274. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  275. begin
  276. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  277. end;
  278. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  279. begin
  280. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  281. end;
  282. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  283. begin
  284. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  285. end;
  286. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  287. var
  288. as1, as2: ansistring;
  289. begin
  290. SetString(as1, S1, MaxLen);
  291. SetString(as2, S2, MaxLen);
  292. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
  293. end;
  294. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  295. var
  296. as1, as2: ansistring;
  297. begin
  298. SetString(as1, S1, MaxLen);
  299. SetString(as2, S2, MaxLen);
  300. Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
  301. end;
  302. function AnsiStrLower(Str: PChar): PChar;
  303. var
  304. s, res: ansistring;
  305. begin
  306. s:=Str;
  307. res:=LowerAnsiString(s);
  308. if Length(res) > Length(s) then
  309. SetLength(res, Length(s));
  310. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  311. Result:=Str;
  312. end;
  313. function AnsiStrUpper(Str: PChar): PChar;
  314. var
  315. s, res: ansistring;
  316. begin
  317. s:=Str;
  318. res:=UpperAnsiString(s);
  319. if Length(res) > Length(s) then
  320. SetLength(res, Length(s));
  321. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  322. Result:=Str;
  323. end;
  324. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  325. var
  326. c: byte;
  327. begin
  328. // Only UTF-8 encoding is supported
  329. c:=byte(Str^);
  330. if c = 0 then
  331. Result:=0
  332. else begin
  333. Result:=1;
  334. if c < $80 then
  335. exit; // 1-byte ASCII char
  336. while c and $C0 = $C0 do begin
  337. Inc(Result);
  338. c:=c shl 1;
  339. end;
  340. if Result > 6 then
  341. Result:=1 // Invalid code point
  342. else
  343. if Result > MaxLookAead then
  344. Result:=-1; // Incomplete code point
  345. end;
  346. end;
  347. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  348. begin
  349. Result := CP_UTF8; // Android always uses UTF-8
  350. end;
  351. procedure SetStdIOCodePage(var T: Text); inline;
  352. begin
  353. case TextRec(T).Mode of
  354. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  355. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  356. end;
  357. end;
  358. procedure SetStdIOCodePages; inline;
  359. begin
  360. SetStdIOCodePage(Input);
  361. SetStdIOCodePage(Output);
  362. SetStdIOCodePage(ErrOutput);
  363. SetStdIOCodePage(StdOut);
  364. SetStdIOCodePage(StdErr);
  365. end;
  366. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  367. var
  368. us: UnicodeString;
  369. begin
  370. Ansi2UnicodeMove(source,cp,us,len);
  371. dest:=us;
  372. end;
  373. function UpperWideString(const s : WideString) : WideString;
  374. begin
  375. Result:=UpperUnicodeString(s);
  376. end;
  377. function LowerWideString(const s : WideString) : WideString;
  378. begin
  379. Result:=LowerUnicodeString(s);
  380. end;
  381. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  382. begin
  383. Result:=CompareUnicodeString(s1, s2, Options);
  384. end;
  385. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  386. begin
  387. Result:=CompareTextUnicodeString(s1, s2);
  388. end;
  389. Procedure SetCWideStringManager;
  390. Var
  391. CWideStringManager : TUnicodeStringManager;
  392. begin
  393. CWideStringManager:=widestringmanager;
  394. With CWideStringManager do
  395. begin
  396. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  397. Ansi2WideMoveProc:=@Ansi2WideMove;
  398. UpperWideStringProc:=@UpperWideString;
  399. LowerWideStringProc:=@LowerWideString;
  400. CompareWideStringProc:=@CompareWideString;
  401. UpperAnsiStringProc:=@UpperAnsiString;
  402. LowerAnsiStringProc:=@LowerAnsiString;
  403. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  404. CompareTextAnsiStringProc:=@AnsiCompareText;
  405. StrCompAnsiStringProc:=@StrCompAnsi;
  406. StrICompAnsiStringProc:=@AnsiStrIComp;
  407. StrLCompAnsiStringProc:=@AnsiStrLComp;
  408. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  409. StrLowerAnsiStringProc:=@AnsiStrLower;
  410. StrUpperAnsiStringProc:=@AnsiStrUpper;
  411. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  412. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  413. UpperUnicodeStringProc:=@UpperUnicodeString;
  414. LowerUnicodeStringProc:=@LowerUnicodeString;
  415. CompareUnicodeStringProc:=@CompareUnicodeString;
  416. GetStandardCodePageProc:=@GetStandardCodePage;
  417. CodePointLengthProc:=@CodePointLength;
  418. end;
  419. SetUnicodeStringManager(CWideStringManager);
  420. end;
  421. procedure UnloadICU;
  422. begin
  423. if hlibICUi18n <> 0 then begin
  424. if DefColl <> nil then
  425. ucol_close(DefColl);
  426. UnloadLibrary(hlibICUi18n);
  427. hlibICUi18n:=0;
  428. end;
  429. if hlibICU <> 0 then begin
  430. if DefConv <> nil then
  431. ucnv_close(DefConv);
  432. if LastConv <> nil then
  433. ucnv_close(LastConv);
  434. UnloadLibrary(hlibICU);
  435. hlibICU:=0;
  436. end;
  437. end;
  438. procedure LoadICU;
  439. var
  440. LibVer: ansistring;
  441. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  442. var
  443. p: pointer;
  444. begin
  445. if hLib = 0 then
  446. hLib:=hlibICU;
  447. p:=GetProcedureAddress(hlib, Name + LibVer);
  448. if p = nil then begin
  449. // unload lib on failure
  450. UnloadICU;
  451. Result:=False;
  452. end
  453. else begin
  454. pointer(ProcPtr):=p;
  455. Result:=True;
  456. end;
  457. end;
  458. const
  459. ICUver: array [1..9] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55');
  460. TestProcName = 'ucnv_open';
  461. var
  462. i: longint;
  463. s: ansistring;
  464. begin
  465. hlibICU:=LoadLibrary('libicuuc.so');
  466. hlibICUi18n:=LoadLibrary('libicui18n.so');
  467. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  468. UnloadICU;
  469. exit;
  470. end;
  471. // Finding ICU version using known versions table
  472. for i:=High(ICUver) downto Low(ICUver) do begin
  473. s:='_' + ICUver[i];
  474. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  475. LibVer:=s;
  476. break;
  477. end;
  478. end;
  479. if LibVer = '' then begin
  480. // Finding unknown ICU version
  481. Val(ICUver[High(ICUver)], i);
  482. repeat
  483. Inc(i);
  484. Str(i, s);
  485. s:='_' + s;
  486. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  487. LibVer:=s;
  488. break;
  489. end;
  490. until i >= 100;
  491. end;
  492. if LibVer = '' then begin
  493. // Trying versionless name
  494. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  495. // Unable to get ICU version
  496. UnloadICU;
  497. exit;
  498. end;
  499. end;
  500. if not _GetProc('ucnv_open', ucnv_open) then exit;
  501. if not _GetProc('ucnv_close', ucnv_close) then exit;
  502. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  503. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  504. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  505. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  506. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  507. if not _GetProc('u_strToLower', u_strToLower) then exit;
  508. if not _GetProc('u_strCompare', u_strCompare) then exit;
  509. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  510. if not _GetProc('u_errorName', u_errorName) then exit;
  511. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  512. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  513. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  514. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  515. end;
  516. initialization
  517. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  518. DefaultUnicodeCodePage:=CP_UTF16;
  519. LoadICU;
  520. SetCWideStringManager;
  521. SetStdIOCodePages;
  522. finalization
  523. UnloadICU;
  524. end.