cwstring.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = CP_UTF8) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  108. // fallback implementation
  109. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  110. exit;
  111. end;
  112. len2:=len*3;
  113. SetLength(dest, len2);
  114. err:=0;
  115. if conv <> nil then
  116. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  117. else begin
  118. // Use UTF-8 conversion from RTL
  119. cp:=CP_UTF8;
  120. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  121. end;
  122. if len2 > Length(dest) then begin
  123. SetLength(dest, len2);
  124. err:=0;
  125. if conv <> nil then
  126. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
  127. else
  128. len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
  129. end;
  130. if len2 < 0 then
  131. len2:=0;
  132. SetLength(dest, len2);
  133. SetCodePage(dest, cp, False);
  134. end;
  135. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  136. var
  137. len2: SizeInt;
  138. conv: PUConverter;
  139. err: UErrorCode;
  140. begin
  141. if len = 0 then begin
  142. dest:='';
  143. exit;
  144. end;
  145. conv:=GetConverter(cp);
  146. if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
  147. // fallback implementation
  148. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  149. exit;
  150. end;
  151. len2:=len;
  152. SetLength(dest, len2);
  153. err:=0;
  154. if conv <> nil then
  155. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  156. else
  157. // Use UTF-8 conversion from RTL
  158. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  159. if len2 > Length(dest) then begin
  160. SetLength(dest, len2);
  161. err:=0;
  162. if conv <> nil then
  163. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
  164. else
  165. len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
  166. end;
  167. if len2 < 0 then
  168. len2:=0;
  169. SetLength(dest, len2);
  170. end;
  171. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  172. var
  173. len, len2: SizeInt;
  174. err: UErrorCode;
  175. begin
  176. if hlibICU = 0 then begin
  177. // fallback implementation
  178. Result:=UnicodeString(UpCase(AnsiString(s)));
  179. exit;
  180. end;
  181. len:=Length(s);
  182. SetLength(Result, len);
  183. if len = 0 then
  184. exit;
  185. err:=0;
  186. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  187. if len2 > len then begin
  188. SetLength(Result, len2);
  189. err:=0;
  190. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  191. end;
  192. SetLength(Result, len2);
  193. end;
  194. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  195. var
  196. len, len2: SizeInt;
  197. err: UErrorCode;
  198. begin
  199. if hlibICU = 0 then begin
  200. // fallback implementation
  201. Result:=UnicodeString(LowerCase(AnsiString(s)));
  202. exit;
  203. end;
  204. len:=Length(s);
  205. SetLength(Result, len);
  206. if len = 0 then
  207. exit;
  208. err:=0;
  209. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  210. if len2 > len then begin
  211. SetLength(Result, len2);
  212. err:=0;
  213. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  214. end;
  215. SetLength(Result, len2);
  216. end;
  217. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  218. var
  219. count, count1, count2: SizeInt;
  220. begin
  221. result := 0;
  222. Count1 := Length(S1);
  223. Count2 := Length(S2);
  224. if Count1>Count2 then
  225. Count:=Count2
  226. else
  227. Count:=Count1;
  228. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  229. if result=0 then
  230. result:=Count1 - Count2;
  231. end;
  232. function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  233. const
  234. U_COMPARE_CODE_POINT_ORDER = $8000;
  235. var
  236. err: UErrorCode;
  237. begin
  238. if hlibICU = 0 then begin
  239. // fallback implementation
  240. Result:=_CompareStr(s1, s2);
  241. exit;
  242. end;
  243. if (coIgnoreCase in Options) then begin
  244. err:=0;
  245. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  246. end
  247. else begin
  248. InitThreadData;
  249. if DefColl <> nil then
  250. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  251. else
  252. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  253. end;
  254. end;
  255. function UpperAnsiString(const s : AnsiString) : AnsiString;
  256. begin
  257. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  258. end;
  259. function LowerAnsiString(const s : AnsiString) : AnsiString;
  260. begin
  261. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  262. end;
  263. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  264. begin
  265. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
  266. end;
  267. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  268. begin
  269. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
  270. end;
  271. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  272. begin
  273. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
  274. end;
  275. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  276. begin
  277. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
  278. end;
  279. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  280. var
  281. as1, as2: ansistring;
  282. begin
  283. SetString(as1, S1, MaxLen);
  284. SetString(as2, S2, MaxLen);
  285. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), []);
  286. end;
  287. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  288. var
  289. as1, as2: ansistring;
  290. begin
  291. SetString(as1, S1, MaxLen);
  292. SetString(as2, S2, MaxLen);
  293. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), [coIgnoreCase]);
  294. end;
  295. function AnsiStrLower(Str: PChar): PChar;
  296. var
  297. s, res: ansistring;
  298. begin
  299. s:=Str;
  300. res:=LowerAnsiString(s);
  301. if Length(res) > Length(s) then
  302. SetLength(res, Length(s));
  303. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  304. Result:=Str;
  305. end;
  306. function AnsiStrUpper(Str: PChar): PChar;
  307. var
  308. s, res: ansistring;
  309. begin
  310. s:=Str;
  311. res:=UpperAnsiString(s);
  312. if Length(res) > Length(s) then
  313. SetLength(res, Length(s));
  314. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  315. Result:=Str;
  316. end;
  317. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  318. var
  319. c: byte;
  320. begin
  321. // Only UTF-8 encoding is supported
  322. c:=byte(Str^);
  323. if c = 0 then
  324. Result:=0
  325. else begin
  326. Result:=1;
  327. if c < $80 then
  328. exit; // 1-byte ASCII char
  329. while c and $C0 = $C0 do begin
  330. Inc(Result);
  331. c:=c shl 1;
  332. end;
  333. if Result > 6 then
  334. Result:=1 // Invalid code point
  335. else
  336. if Result > MaxLookAead then
  337. Result:=-1; // Incomplete code point
  338. end;
  339. end;
  340. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  341. begin
  342. Result := CP_UTF8; // Android always uses UTF-8
  343. end;
  344. procedure SetStdIOCodePage(var T: Text); inline;
  345. begin
  346. case TextRec(T).Mode of
  347. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  348. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  349. end;
  350. end;
  351. procedure SetStdIOCodePages; inline;
  352. begin
  353. SetStdIOCodePage(Input);
  354. SetStdIOCodePage(Output);
  355. SetStdIOCodePage(ErrOutput);
  356. SetStdIOCodePage(StdOut);
  357. SetStdIOCodePage(StdErr);
  358. end;
  359. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  360. var
  361. us: UnicodeString;
  362. begin
  363. Ansi2UnicodeMove(source,cp,us,len);
  364. dest:=us;
  365. end;
  366. function UpperWideString(const s : WideString) : WideString;
  367. begin
  368. Result:=UpperUnicodeString(s);
  369. end;
  370. function LowerWideString(const s : WideString) : WideString;
  371. begin
  372. Result:=LowerUnicodeString(s);
  373. end;
  374. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  375. begin
  376. Result:=CompareUnicodeString(s1, s2, Options);
  377. end;
  378. Procedure SetCWideStringManager;
  379. Var
  380. CWideStringManager : TUnicodeStringManager;
  381. begin
  382. CWideStringManager:=widestringmanager;
  383. With CWideStringManager do
  384. begin
  385. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  386. Ansi2WideMoveProc:=@Ansi2WideMove;
  387. UpperWideStringProc:=@UpperWideString;
  388. LowerWideStringProc:=@LowerWideString;
  389. CompareWideStringProc:=@CompareWideString;
  390. UpperAnsiStringProc:=@UpperAnsiString;
  391. LowerAnsiStringProc:=@LowerAnsiString;
  392. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  393. CompareTextAnsiStringProc:=@AnsiCompareText;
  394. StrCompAnsiStringProc:=@StrCompAnsi;
  395. StrICompAnsiStringProc:=@AnsiStrIComp;
  396. StrLCompAnsiStringProc:=@AnsiStrLComp;
  397. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  398. StrLowerAnsiStringProc:=@AnsiStrLower;
  399. StrUpperAnsiStringProc:=@AnsiStrUpper;
  400. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  401. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  402. UpperUnicodeStringProc:=@UpperUnicodeString;
  403. LowerUnicodeStringProc:=@LowerUnicodeString;
  404. CompareUnicodeStringProc:=@CompareUnicodeString;
  405. GetStandardCodePageProc:=@GetStandardCodePage;
  406. CodePointLengthProc:=@CodePointLength;
  407. end;
  408. SetUnicodeStringManager(CWideStringManager);
  409. end;
  410. procedure UnloadICU;
  411. begin
  412. if hlibICUi18n <> 0 then begin
  413. if DefColl <> nil then
  414. ucol_close(DefColl);
  415. UnloadLibrary(hlibICUi18n);
  416. hlibICUi18n:=0;
  417. end;
  418. if hlibICU <> 0 then begin
  419. if DefConv <> nil then
  420. ucnv_close(DefConv);
  421. if LastConv <> nil then
  422. ucnv_close(LastConv);
  423. UnloadLibrary(hlibICU);
  424. hlibICU:=0;
  425. end;
  426. end;
  427. procedure LoadICU;
  428. var
  429. LibVer: ansistring;
  430. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  431. var
  432. p: pointer;
  433. begin
  434. if hLib = 0 then
  435. hLib:=hlibICU;
  436. p:=GetProcedureAddress(hlib, Name + LibVer);
  437. if p = nil then begin
  438. // unload lib on failure
  439. UnloadICU;
  440. Result:=False;
  441. end
  442. else begin
  443. pointer(ProcPtr):=p;
  444. Result:=True;
  445. end;
  446. end;
  447. const
  448. ICUver: array [1..9] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55');
  449. TestProcName = 'ucnv_open';
  450. var
  451. i: longint;
  452. s: ansistring;
  453. begin
  454. hlibICU:=LoadLibrary('libicuuc.so');
  455. hlibICUi18n:=LoadLibrary('libicui18n.so');
  456. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  457. UnloadICU;
  458. exit;
  459. end;
  460. // Finding ICU version using known versions table
  461. for i:=High(ICUver) downto Low(ICUver) do begin
  462. s:='_' + ICUver[i];
  463. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  464. LibVer:=s;
  465. break;
  466. end;
  467. end;
  468. if LibVer = '' then begin
  469. // Finding unknown ICU version
  470. Val(ICUver[High(ICUver)], i);
  471. repeat
  472. Inc(i);
  473. Str(i, s);
  474. s:='_' + s;
  475. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  476. LibVer:=s;
  477. break;
  478. end;
  479. until i >= 100;
  480. end;
  481. if LibVer = '' then begin
  482. // Trying versionless name
  483. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  484. // Unable to get ICU version
  485. UnloadICU;
  486. exit;
  487. end;
  488. end;
  489. if not _GetProc('ucnv_open', ucnv_open) then exit;
  490. if not _GetProc('ucnv_close', ucnv_close) then exit;
  491. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  492. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  493. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  494. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  495. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  496. if not _GetProc('u_strToLower', u_strToLower) then exit;
  497. if not _GetProc('u_strCompare', u_strCompare) then exit;
  498. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  499. if not _GetProc('u_errorName', u_errorName) then exit;
  500. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  501. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  502. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  503. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  504. end;
  505. initialization
  506. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  507. DefaultUnicodeCodePage:=CP_UTF16;
  508. LoadICU;
  509. SetCWideStringManager;
  510. SetStdIOCodePages;
  511. finalization
  512. UnloadICU;
  513. end.