cwstring.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. LibVer: ansistring;
  31. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  32. ucnv_close: procedure (converter: PUConverter); cdecl;
  33. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  34. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  35. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  39. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  40. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  41. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  42. ucol_close: procedure (coll: PUCollator); cdecl;
  43. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  44. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  45. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  46. threadvar
  47. ThreadDataInited: boolean;
  48. DefConv, LastConv: PUConverter;
  49. LastCP: TSystemCodePage;
  50. DefColl: PUCollator;
  51. function OpenConverter(const name: ansistring): PUConverter;
  52. var
  53. err: UErrorCode;
  54. begin
  55. err:=0;
  56. Result:=ucnv_open(PAnsiChar(name), err);
  57. if Result <> nil then begin
  58. ucnv_setSubstChars(Result, '?', 1, err);
  59. ucnv_setFallback(Result, True);
  60. end;
  61. end;
  62. procedure InitThreadData;
  63. var
  64. err: UErrorCode;
  65. col: PUCollator;
  66. begin
  67. if (hlibICU = 0) or ThreadDataInited then
  68. exit;
  69. ThreadDataInited:=True;
  70. DefConv:=OpenConverter('utf8');
  71. err:=0;
  72. col:=ucol_open(nil, err);
  73. if col <> nil then
  74. ucol_setStrength(col, 2);
  75. DefColl:=col;
  76. end;
  77. function GetConverter(cp: TSystemCodePage): PUConverter;
  78. var
  79. s: ansistring;
  80. begin
  81. if hlibICU = 0 then begin
  82. Result:=nil;
  83. exit;
  84. end;
  85. InitThreadData;
  86. if (cp = CP_UTF8) or (cp = CP_ACP) then
  87. Result:=DefConv
  88. else begin
  89. if cp <> LastCP then begin
  90. Str(cp, s);
  91. LastConv:=OpenConverter('cp' + s);
  92. LastCP:=cp;
  93. end;
  94. Result:=LastConv;
  95. end;
  96. end;
  97. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  98. var
  99. len2: SizeInt;
  100. conv: PUConverter;
  101. err: UErrorCode;
  102. begin
  103. if len = 0 then begin
  104. dest:='';
  105. exit;
  106. end;
  107. conv:=GetConverter(cp);
  108. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  109. // fallback implementation
  110. DefaultUnicode2AnsiMove(source,dest,cp,len);
  111. exit;
  112. end;
  113. len2:=len*3;
  114. SetLength(dest, len2);
  115. err:=0;
  116. if conv <> nil then
  117. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  118. else begin
  119. // Use UTF-8 conversion from RTL
  120. cp:=CP_UTF8;
  121. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  122. end;
  123. if len2 > Length(dest) then begin
  124. SetLength(dest, len2);
  125. err:=0;
  126. if conv <> nil then
  127. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  128. else
  129. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  130. end;
  131. if len2 < 0 then
  132. len2:=0;
  133. SetLength(dest, len2);
  134. SetCodePage(dest, cp, False);
  135. end;
  136. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  137. var
  138. len2: SizeInt;
  139. conv: PUConverter;
  140. err: UErrorCode;
  141. begin
  142. if len = 0 then begin
  143. dest:='';
  144. exit;
  145. end;
  146. conv:=GetConverter(cp);
  147. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  148. // fallback implementation
  149. DefaultAnsi2UnicodeMove(source,cp,dest,len);
  150. exit;
  151. end;
  152. len2:=len;
  153. SetLength(dest, len2);
  154. err:=0;
  155. if conv <> nil then
  156. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  157. else
  158. // Use UTF-8 conversion from RTL
  159. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  160. if len2 > Length(dest) then begin
  161. SetLength(dest, len2);
  162. err:=0;
  163. if conv <> nil then
  164. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  165. else
  166. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  167. end;
  168. if len2 < 0 then
  169. len2:=0;
  170. SetLength(dest, len2);
  171. end;
  172. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  173. var
  174. len, len2: SizeInt;
  175. err: UErrorCode;
  176. begin
  177. if hlibICU = 0 then begin
  178. // fallback implementation
  179. Result:=UnicodeString(UpCase(AnsiString(s)));
  180. exit;
  181. end;
  182. len:=Length(s);
  183. SetLength(Result, len);
  184. if len = 0 then
  185. exit;
  186. err:=0;
  187. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  188. if len2 > len then begin
  189. SetLength(Result, len2);
  190. err:=0;
  191. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  192. end;
  193. SetLength(Result, len2);
  194. end;
  195. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  196. var
  197. len, len2: SizeInt;
  198. err: UErrorCode;
  199. begin
  200. if hlibICU = 0 then begin
  201. // fallback implementation
  202. Result:=UnicodeString(LowerCase(AnsiString(s)));
  203. exit;
  204. end;
  205. len:=Length(s);
  206. SetLength(Result, len);
  207. if len = 0 then
  208. exit;
  209. err:=0;
  210. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  211. if len2 > len then begin
  212. SetLength(Result, len2);
  213. err:=0;
  214. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  215. end;
  216. SetLength(Result, len2);
  217. end;
  218. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  219. var
  220. count, count1, count2: SizeInt;
  221. begin
  222. result := 0;
  223. Count1 := Length(S1);
  224. Count2 := Length(S2);
  225. if Count1>Count2 then
  226. Count:=Count2
  227. else
  228. Count:=Count1;
  229. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  230. if result=0 then
  231. result:=Count1 - Count2;
  232. end;
  233. function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  234. const
  235. U_COMPARE_CODE_POINT_ORDER = $8000;
  236. var
  237. err: UErrorCode;
  238. begin
  239. if hlibICU = 0 then begin
  240. // fallback implementation
  241. Result:=_CompareStr(s1, s2);
  242. exit;
  243. end;
  244. if (coIgnoreCase in Options) then begin
  245. err:=0;
  246. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  247. end
  248. else begin
  249. InitThreadData;
  250. if DefColl <> nil then
  251. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  252. else
  253. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  254. end;
  255. end;
  256. function UpperAnsiString(const s : AnsiString) : AnsiString;
  257. begin
  258. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  259. end;
  260. function LowerAnsiString(const s : AnsiString) : AnsiString;
  261. begin
  262. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  263. end;
  264. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  265. begin
  266. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
  267. end;
  268. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  269. begin
  270. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
  271. end;
  272. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  273. begin
  274. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
  275. end;
  276. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  277. begin
  278. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
  279. end;
  280. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  281. var
  282. as1, as2: ansistring;
  283. begin
  284. SetString(as1, S1, MaxLen);
  285. SetString(as2, S2, MaxLen);
  286. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), []);
  287. end;
  288. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  289. var
  290. as1, as2: ansistring;
  291. begin
  292. SetString(as1, S1, MaxLen);
  293. SetString(as2, S2, MaxLen);
  294. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), [coIgnoreCase]);
  295. end;
  296. function AnsiStrLower(Str: PChar): PChar;
  297. var
  298. s, res: ansistring;
  299. begin
  300. s:=Str;
  301. res:=LowerAnsiString(s);
  302. if Length(res) > Length(s) then
  303. SetLength(res, Length(s));
  304. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  305. Result:=Str;
  306. end;
  307. function AnsiStrUpper(Str: PChar): PChar;
  308. var
  309. s, res: ansistring;
  310. begin
  311. s:=Str;
  312. res:=UpperAnsiString(s);
  313. if Length(res) > Length(s) then
  314. SetLength(res, Length(s));
  315. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  316. Result:=Str;
  317. end;
  318. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  319. var
  320. c: byte;
  321. begin
  322. // Only UTF-8 encoding is supported
  323. c:=byte(Str^);
  324. if c = 0 then
  325. Result:=0
  326. else begin
  327. Result:=1;
  328. if c < $80 then
  329. exit; // 1-byte ASCII char
  330. while c and $C0 = $C0 do begin
  331. Inc(Result);
  332. c:=c shl 1;
  333. end;
  334. if Result > 6 then
  335. Result:=1 // Invalid code point
  336. else
  337. if Result > MaxLookAead then
  338. Result:=-1; // Incomplete code point
  339. end;
  340. end;
  341. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  342. begin
  343. Result := CP_UTF8; // Android always uses UTF-8
  344. end;
  345. procedure SetStdIOCodePage(var T: Text); inline;
  346. begin
  347. case TextRec(T).Mode of
  348. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  349. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  350. end;
  351. end;
  352. procedure SetStdIOCodePages; inline;
  353. begin
  354. SetStdIOCodePage(Input);
  355. SetStdIOCodePage(Output);
  356. SetStdIOCodePage(ErrOutput);
  357. SetStdIOCodePage(StdOut);
  358. SetStdIOCodePage(StdErr);
  359. end;
  360. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  361. var
  362. us: UnicodeString;
  363. begin
  364. Ansi2UnicodeMove(source,cp,us,len);
  365. dest:=us;
  366. end;
  367. function UpperWideString(const s : WideString) : WideString;
  368. begin
  369. Result:=UpperUnicodeString(s);
  370. end;
  371. function LowerWideString(const s : WideString) : WideString;
  372. begin
  373. Result:=LowerUnicodeString(s);
  374. end;
  375. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  376. begin
  377. Result:=CompareUnicodeString(s1, s2, Options);
  378. end;
  379. Procedure SetCWideStringManager;
  380. Var
  381. CWideStringManager : TUnicodeStringManager;
  382. begin
  383. CWideStringManager:=widestringmanager;
  384. With CWideStringManager do
  385. begin
  386. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  387. Ansi2WideMoveProc:=@Ansi2WideMove;
  388. UpperWideStringProc:=@UpperWideString;
  389. LowerWideStringProc:=@LowerWideString;
  390. CompareWideStringProc:=@CompareWideString;
  391. UpperAnsiStringProc:=@UpperAnsiString;
  392. LowerAnsiStringProc:=@LowerAnsiString;
  393. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  394. CompareTextAnsiStringProc:=@AnsiCompareText;
  395. StrCompAnsiStringProc:=@StrCompAnsi;
  396. StrICompAnsiStringProc:=@AnsiStrIComp;
  397. StrLCompAnsiStringProc:=@AnsiStrLComp;
  398. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  399. StrLowerAnsiStringProc:=@AnsiStrLower;
  400. StrUpperAnsiStringProc:=@AnsiStrUpper;
  401. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  402. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  403. UpperUnicodeStringProc:=@UpperUnicodeString;
  404. LowerUnicodeStringProc:=@LowerUnicodeString;
  405. CompareUnicodeStringProc:=@CompareUnicodeString;
  406. GetStandardCodePageProc:=@GetStandardCodePage;
  407. CodePointLengthProc:=@CodePointLength;
  408. end;
  409. SetUnicodeStringManager(CWideStringManager);
  410. end;
  411. procedure UnloadICU;
  412. begin
  413. if DefColl <> nil then
  414. ucol_close(DefColl);
  415. if DefConv <> nil then
  416. ucnv_close(DefConv);
  417. if LastConv <> nil then
  418. ucnv_close(LastConv);
  419. if hlibICU <> 0 then begin
  420. UnloadLibrary(hlibICU);
  421. hlibICU:=0;
  422. end;
  423. if hlibICUi18n <> 0 then begin
  424. UnloadLibrary(hlibICUi18n);
  425. hlibICUi18n:=0;
  426. end;
  427. end;
  428. function GetIcuProc(const Name: AnsiString; out ProcPtr; libId: longint = 0): boolean; [public, alias: 'CWSTRING_GET_ICU_PROC'];
  429. var
  430. p: pointer;
  431. hLib: TLibHandle;
  432. begin
  433. Result:=False;
  434. if libId = 0 then
  435. hLib:=hlibICU
  436. else
  437. hLib:=hlibICUi18n;
  438. if hLib = 0 then
  439. exit;
  440. p:=GetProcedureAddress(hlib, Name + LibVer);
  441. if p = nil then
  442. exit;
  443. pointer(ProcPtr):=p;
  444. Result:=True;
  445. end;
  446. function LoadICU: boolean;
  447. const
  448. ICUver: array [1..9] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55');
  449. TestProcName = 'ucnv_open';
  450. var
  451. i: longint;
  452. s: ansistring;
  453. begin
  454. Result:=False;
  455. {$ifdef android}
  456. hlibICU:=LoadLibrary('libicuuc.so');
  457. hlibICUi18n:=LoadLibrary('libicui18n.so');
  458. {$else}
  459. hlibICU:=LoadLibrary('icuuc40.dll');
  460. hlibICUi18n:=LoadLibrary('icuin40.dll');
  461. LibVer:='_4_0';
  462. {$endif android}
  463. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  464. UnloadICU;
  465. exit;
  466. end;
  467. // Finding ICU version using known versions table
  468. for i:=High(ICUver) downto Low(ICUver) do begin
  469. s:='_' + ICUver[i];
  470. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  471. LibVer:=s;
  472. break;
  473. end;
  474. end;
  475. if LibVer = '' then begin
  476. // Finding unknown ICU version
  477. Val(ICUver[High(ICUver)], i);
  478. repeat
  479. Inc(i);
  480. Str(i, s);
  481. s:='_' + s;
  482. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  483. LibVer:=s;
  484. break;
  485. end;
  486. until i >= 100;
  487. end;
  488. if LibVer = '' then begin
  489. // Trying versionless name
  490. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  491. // Unable to get ICU version
  492. UnloadICU;
  493. exit;
  494. end;
  495. end;
  496. if not GetIcuProc('ucnv_open', ucnv_open) then exit;
  497. if not GetIcuProc('ucnv_close', ucnv_close) then exit;
  498. if not GetIcuProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  499. if not GetIcuProc('ucnv_setFallback', ucnv_setFallback) then exit;
  500. if not GetIcuProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  501. if not GetIcuProc('ucnv_toUChars', ucnv_toUChars) then exit;
  502. if not GetIcuProc('u_strToUpper', u_strToUpper) then exit;
  503. if not GetIcuProc('u_strToLower', u_strToLower) then exit;
  504. if not GetIcuProc('u_strCompare', u_strCompare) then exit;
  505. if not GetIcuProc('u_strCaseCompare', u_strCaseCompare) then exit;
  506. if not GetIcuProc('u_errorName', u_errorName) then exit;
  507. if not GetIcuProc('ucol_open', ucol_open, 1) then exit;
  508. if not GetIcuProc('ucol_close', ucol_close, 1) then exit;
  509. if not GetIcuProc('ucol_strcoll', ucol_strcoll, 1) then exit;
  510. if not GetIcuProc('ucol_setStrength', ucol_setStrength, 1) then exit;
  511. Result:=True;
  512. end;
  513. var
  514. oldm: TUnicodeStringManager;
  515. initialization
  516. GetUnicodeStringManager(oldm);
  517. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  518. DefaultUnicodeCodePage:=CP_UTF16;
  519. if LoadICU then begin
  520. SetCWideStringManager;
  521. {$ifdef android}
  522. SetStdIOCodePages;
  523. {$endif android}
  524. end;
  525. finalization
  526. SetUnicodeStringManager(oldm);
  527. UnloadICU;
  528. end.