cwstring.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = CP_UTF8) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  108. // fallback implementation
  109. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  110. exit;
  111. end;
  112. len2:=len*3;
  113. SetLength(dest, len2);
  114. err:=0;
  115. if conv <> nil then
  116. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  117. else begin
  118. // Use UTF-8 conversion from RTL
  119. cp:=CP_UTF8;
  120. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  121. end;
  122. if len2 > Length(dest) then begin
  123. SetLength(dest, len2);
  124. err:=0;
  125. if conv <> nil then
  126. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  127. else
  128. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  129. end;
  130. if len2 < 0 then
  131. len2:=0;
  132. SetLength(dest, len2);
  133. SetCodePage(dest, cp, False);
  134. end;
  135. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  136. var
  137. len2: SizeInt;
  138. conv: PUConverter;
  139. err: UErrorCode;
  140. begin
  141. if len = 0 then begin
  142. dest:='';
  143. exit;
  144. end;
  145. conv:=GetConverter(cp);
  146. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  147. // fallback implementation
  148. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  149. exit;
  150. end;
  151. len2:=len;
  152. SetLength(dest, len2);
  153. err:=0;
  154. if conv <> nil then
  155. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  156. else
  157. // Use UTF-8 conversion from RTL
  158. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  159. if len2 > Length(dest) then begin
  160. SetLength(dest, len2);
  161. err:=0;
  162. if conv <> nil then
  163. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  164. else
  165. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  166. end;
  167. if len2 < 0 then
  168. len2:=0;
  169. SetLength(dest, len2);
  170. end;
  171. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  172. var
  173. len, len2: SizeInt;
  174. err: UErrorCode;
  175. begin
  176. if hlibICU = 0 then begin
  177. // fallback implementation
  178. Result:=UnicodeString(UpCase(AnsiString(s)));
  179. exit;
  180. end;
  181. len:=Length(s);
  182. SetLength(Result, len);
  183. if len = 0 then
  184. exit;
  185. err:=0;
  186. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  187. if len2 > len then begin
  188. SetLength(Result, len2);
  189. err:=0;
  190. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  191. end;
  192. SetLength(Result, len2);
  193. end;
  194. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  195. var
  196. len, len2: SizeInt;
  197. err: UErrorCode;
  198. begin
  199. if hlibICU = 0 then begin
  200. // fallback implementation
  201. Result:=UnicodeString(LowerCase(AnsiString(s)));
  202. exit;
  203. end;
  204. len:=Length(s);
  205. SetLength(Result, len);
  206. if len = 0 then
  207. exit;
  208. err:=0;
  209. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  210. if len2 > len then begin
  211. SetLength(Result, len2);
  212. err:=0;
  213. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  214. end;
  215. SetLength(Result, len2);
  216. end;
  217. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  218. var
  219. count, count1, count2: SizeInt;
  220. begin
  221. result := 0;
  222. Count1 := Length(S1);
  223. Count2 := Length(S2);
  224. if Count1>Count2 then
  225. Count:=Count2
  226. else
  227. Count:=Count1;
  228. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  229. if result=0 then
  230. result:=Count1 - Count2;
  231. end;
  232. function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  233. const
  234. U_COMPARE_CODE_POINT_ORDER = $8000;
  235. var
  236. err: UErrorCode;
  237. begin
  238. if hlibICU = 0 then begin
  239. // fallback implementation
  240. Result:=_CompareStr(s1, s2);
  241. exit;
  242. end;
  243. InitThreadData;
  244. if (coIgnoreCase in Options) then
  245. u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err)
  246. else
  247. if DefColl <> nil then
  248. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  249. else
  250. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  251. end;
  252. function CompareTextUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions): PtrInt;
  253. const
  254. U_COMPARE_CODE_POINT_ORDER = $8000;
  255. var
  256. err: UErrorCode;
  257. begin
  258. if hlibICU = 0 then begin
  259. // fallback implementation
  260. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  261. exit;
  262. end;
  263. err:=0;
  264. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  265. end;
  266. function UpperAnsiString(const s : AnsiString) : AnsiString;
  267. begin
  268. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  269. end;
  270. function LowerAnsiString(const s : AnsiString) : AnsiString;
  271. begin
  272. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  273. end;
  274. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  275. begin
  276. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2),[]);
  277. end;
  278. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  279. begin
  280. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2),[]);
  281. end;
  282. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  283. begin
  284. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2),[coIgnoreCase]);
  285. end;
  286. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  287. begin
  288. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2),[coIgnoreCase]);
  289. end;
  290. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  291. var
  292. as1, as2: ansistring;
  293. begin
  294. SetString(as1, S1, MaxLen);
  295. SetString(as2, S2, MaxLen);
  296. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2),[]);
  297. end;
  298. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  299. var
  300. as1, as2: ansistring;
  301. begin
  302. SetString(as1, S1, MaxLen);
  303. SetString(as2, S2, MaxLen);
  304. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2),[coIgnoreCase]);
  305. end;
  306. function AnsiStrLower(Str: PChar): PChar;
  307. var
  308. s, res: ansistring;
  309. begin
  310. s:=Str;
  311. res:=LowerAnsiString(s);
  312. if Length(res) > Length(s) then
  313. SetLength(res, Length(s));
  314. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  315. Result:=Str;
  316. end;
  317. function AnsiStrUpper(Str: PChar): PChar;
  318. var
  319. s, res: ansistring;
  320. begin
  321. s:=Str;
  322. res:=UpperAnsiString(s);
  323. if Length(res) > Length(s) then
  324. SetLength(res, Length(s));
  325. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  326. Result:=Str;
  327. end;
  328. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  329. var
  330. c: byte;
  331. begin
  332. // Only UTF-8 encoding is supported
  333. c:=byte(Str^);
  334. if c = 0 then
  335. Result:=0
  336. else begin
  337. Result:=1;
  338. if c < $80 then
  339. exit; // 1-byte ASCII char
  340. while c and $C0 = $C0 do begin
  341. Inc(Result);
  342. c:=c shl 1;
  343. end;
  344. if Result > 6 then
  345. Result:=1 // Invalid code point
  346. else
  347. if Result > MaxLookAead then
  348. Result:=-1; // Incomplete code point
  349. end;
  350. end;
  351. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  352. begin
  353. Result := CP_UTF8; // Android always uses UTF-8
  354. end;
  355. procedure SetStdIOCodePage(var T: Text); inline;
  356. begin
  357. case TextRec(T).Mode of
  358. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  359. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  360. end;
  361. end;
  362. procedure SetStdIOCodePages; inline;
  363. begin
  364. SetStdIOCodePage(Input);
  365. SetStdIOCodePage(Output);
  366. SetStdIOCodePage(ErrOutput);
  367. SetStdIOCodePage(StdOut);
  368. SetStdIOCodePage(StdErr);
  369. end;
  370. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  371. var
  372. us: UnicodeString;
  373. begin
  374. Ansi2UnicodeMove(source,cp,us,len);
  375. dest:=us;
  376. end;
  377. function UpperWideString(const s : WideString) : WideString;
  378. begin
  379. Result:=UpperUnicodeString(s);
  380. end;
  381. function LowerWideString(const s : WideString) : WideString;
  382. begin
  383. Result:=LowerUnicodeString(s);
  384. end;
  385. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  386. begin
  387. Result:=CompareUnicodeString(s1, s2, Options);
  388. end;
  389. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  390. begin
  391. Result:=CompareTextUnicodeString(s1, s2,[coIgnoreCase]);
  392. end;
  393. Procedure SetCWideStringManager;
  394. Var
  395. CWideStringManager : TUnicodeStringManager;
  396. begin
  397. CWideStringManager:=widestringmanager;
  398. With CWideStringManager do
  399. begin
  400. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  401. Ansi2WideMoveProc:=@Ansi2WideMove;
  402. UpperWideStringProc:=@UpperWideString;
  403. LowerWideStringProc:=@LowerWideString;
  404. CompareWideStringProc:=@CompareWideString;
  405. UpperAnsiStringProc:=@UpperAnsiString;
  406. LowerAnsiStringProc:=@LowerAnsiString;
  407. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  408. CompareTextAnsiStringProc:=@AnsiCompareText;
  409. StrCompAnsiStringProc:=@StrCompAnsi;
  410. StrICompAnsiStringProc:=@AnsiStrIComp;
  411. StrLCompAnsiStringProc:=@AnsiStrLComp;
  412. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  413. StrLowerAnsiStringProc:=@AnsiStrLower;
  414. StrUpperAnsiStringProc:=@AnsiStrUpper;
  415. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  416. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  417. UpperUnicodeStringProc:=@UpperUnicodeString;
  418. LowerUnicodeStringProc:=@LowerUnicodeString;
  419. CompareUnicodeStringProc:=@CompareUnicodeString;
  420. GetStandardCodePageProc:=@GetStandardCodePage;
  421. CodePointLengthProc:=@CodePointLength;
  422. end;
  423. SetUnicodeStringManager(CWideStringManager);
  424. end;
  425. procedure UnloadICU;
  426. begin
  427. if hlibICUi18n <> 0 then begin
  428. if DefColl <> nil then
  429. ucol_close(DefColl);
  430. UnloadLibrary(hlibICUi18n);
  431. hlibICUi18n:=0;
  432. end;
  433. if hlibICU <> 0 then begin
  434. if DefConv <> nil then
  435. ucnv_close(DefConv);
  436. if LastConv <> nil then
  437. ucnv_close(LastConv);
  438. UnloadLibrary(hlibICU);
  439. hlibICU:=0;
  440. end;
  441. end;
  442. procedure LoadICU;
  443. var
  444. LibVer: ansistring;
  445. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  446. var
  447. p: pointer;
  448. begin
  449. if hLib = 0 then
  450. hLib:=hlibICU;
  451. p:=GetProcedureAddress(hlib, Name + LibVer);
  452. if p = nil then begin
  453. // unload lib on failure
  454. UnloadICU;
  455. Result:=False;
  456. end
  457. else begin
  458. pointer(ProcPtr):=p;
  459. Result:=True;
  460. end;
  461. end;
  462. const
  463. ICUver: array [1..9] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55');
  464. TestProcName = 'ucnv_open';
  465. var
  466. i: longint;
  467. s: ansistring;
  468. begin
  469. hlibICU:=LoadLibrary('libicuuc.so');
  470. hlibICUi18n:=LoadLibrary('libicui18n.so');
  471. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  472. UnloadICU;
  473. exit;
  474. end;
  475. // Finding ICU version using known versions table
  476. for i:=High(ICUver) downto Low(ICUver) do begin
  477. s:='_' + ICUver[i];
  478. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  479. LibVer:=s;
  480. break;
  481. end;
  482. end;
  483. if LibVer = '' then begin
  484. // Finding unknown ICU version
  485. Val(ICUver[High(ICUver)], i);
  486. repeat
  487. Inc(i);
  488. Str(i, s);
  489. s:='_' + s;
  490. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  491. LibVer:=s;
  492. break;
  493. end;
  494. until i >= 100;
  495. end;
  496. if LibVer = '' then begin
  497. // Trying versionless name
  498. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  499. // Unable to get ICU version
  500. UnloadICU;
  501. exit;
  502. end;
  503. end;
  504. if not _GetProc('ucnv_open', ucnv_open) then exit;
  505. if not _GetProc('ucnv_close', ucnv_close) then exit;
  506. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  507. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  508. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  509. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  510. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  511. if not _GetProc('u_strToLower', u_strToLower) then exit;
  512. if not _GetProc('u_strCompare', u_strCompare) then exit;
  513. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  514. if not _GetProc('u_errorName', u_errorName) then exit;
  515. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  516. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  517. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  518. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  519. end;
  520. initialization
  521. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  522. DefaultUnicodeCodePage:=CP_UTF16;
  523. LoadICU;
  524. SetCWideStringManager;
  525. SetStdIOCodePages;
  526. finalization
  527. UnloadICU;
  528. end.