cwstring.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if conv = nil then begin
  108. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  109. exit;
  110. end;
  111. len2:=len*3;
  112. SetLength(dest, len2);
  113. err:=0;
  114. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  115. if len2 > Length(dest) then begin
  116. SetLength(dest, len2);
  117. err:=0;
  118. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  119. end;
  120. SetLength(dest, len2);
  121. SetCodePage(dest, cp, False);
  122. end;
  123. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  124. var
  125. len2: SizeInt;
  126. conv: PUConverter;
  127. err: UErrorCode;
  128. begin
  129. if len = 0 then begin
  130. dest:='';
  131. exit;
  132. end;
  133. conv:=GetConverter(cp);
  134. if conv = nil then begin
  135. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  136. exit;
  137. end;
  138. len2:=len;
  139. SetLength(dest, len2);
  140. err:=0;
  141. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  142. if len2 > Length(dest) then begin
  143. SetLength(dest, len2);
  144. err:=0;
  145. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  146. end;
  147. SetLength(dest, len2);
  148. end;
  149. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  150. var
  151. len, len2: SizeInt;
  152. err: UErrorCode;
  153. begin
  154. if hlibICU = 0 then begin
  155. // fallback implementation
  156. Result:=UnicodeString(UpCase(AnsiString(s)));
  157. exit;
  158. end;
  159. len:=Length(s);
  160. SetLength(Result, len);
  161. if len = 0 then
  162. exit;
  163. err:=0;
  164. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  165. if len2 > len then begin
  166. SetLength(Result, len2);
  167. err:=0;
  168. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  169. end;
  170. SetLength(Result, len2);
  171. end;
  172. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  173. var
  174. len, len2: SizeInt;
  175. err: UErrorCode;
  176. begin
  177. if hlibICU = 0 then begin
  178. // fallback implementation
  179. Result:=UnicodeString(LowerCase(AnsiString(s)));
  180. exit;
  181. end;
  182. len:=Length(s);
  183. SetLength(Result, len);
  184. if len = 0 then
  185. exit;
  186. err:=0;
  187. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  188. if len2 > len then begin
  189. SetLength(Result, len2);
  190. err:=0;
  191. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  192. end;
  193. SetLength(Result, len2);
  194. end;
  195. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  196. var
  197. count, count1, count2: SizeInt;
  198. begin
  199. result := 0;
  200. Count1 := Length(S1);
  201. Count2 := Length(S2);
  202. if Count1>Count2 then
  203. Count:=Count2
  204. else
  205. Count:=Count1;
  206. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  207. if result=0 then
  208. result:=Count1 - Count2;
  209. end;
  210. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  211. begin
  212. if hlibICU = 0 then begin
  213. // fallback implementation
  214. Result:=_CompareStr(s1, s2);
  215. exit;
  216. end;
  217. InitThreadData;
  218. if DefColl <> nil then
  219. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  220. else
  221. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  222. end;
  223. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  224. const
  225. U_COMPARE_CODE_POINT_ORDER = $8000;
  226. var
  227. err: UErrorCode;
  228. begin
  229. if hlibICU = 0 then begin
  230. // fallback implementation
  231. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  232. exit;
  233. end;
  234. err:=0;
  235. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  236. end;
  237. function UpperAnsiString(const s : AnsiString) : AnsiString;
  238. begin
  239. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  240. end;
  241. function LowerAnsiString(const s : AnsiString) : AnsiString;
  242. begin
  243. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  244. end;
  245. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  246. begin
  247. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  248. end;
  249. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  250. begin
  251. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  252. end;
  253. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  254. begin
  255. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  256. end;
  257. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  258. begin
  259. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  260. end;
  261. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  262. var
  263. as1, as2: ansistring;
  264. begin
  265. SetString(as1, S1, MaxLen);
  266. SetString(as2, S2, MaxLen);
  267. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
  268. end;
  269. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  270. var
  271. as1, as2: ansistring;
  272. begin
  273. SetString(as1, S1, MaxLen);
  274. SetString(as2, S2, MaxLen);
  275. Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
  276. end;
  277. function AnsiStrLower(Str: PChar): PChar;
  278. var
  279. s, res: ansistring;
  280. begin
  281. s:=Str;
  282. res:=LowerAnsiString(s);
  283. if Length(res) > Length(s) then
  284. SetLength(res, Length(s));
  285. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  286. Result:=Str;
  287. end;
  288. function AnsiStrUpper(Str: PChar): PChar;
  289. var
  290. s, res: ansistring;
  291. begin
  292. s:=Str;
  293. res:=UpperAnsiString(s);
  294. if Length(res) > Length(s) then
  295. SetLength(res, Length(s));
  296. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  297. Result:=Str;
  298. end;
  299. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  300. var
  301. c: byte;
  302. begin
  303. // Only UTF-8 encoding is supported
  304. c:=byte(Str^);
  305. if c = 0 then
  306. Result:=0
  307. else begin
  308. Result:=1;
  309. if c < $80 then
  310. exit; // 1-byte ASCII char
  311. while c and $C0 = $C0 do begin
  312. Inc(Result);
  313. c:=c shl 1;
  314. end;
  315. if Result > 6 then
  316. Result:=1 // Invalid code point
  317. else
  318. if Result > MaxLookAead then
  319. Result:=-1; // Incomplete code point
  320. end;
  321. end;
  322. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  323. begin
  324. Result := CP_UTF8; // Android always uses UTF-8
  325. end;
  326. {$i textrec.inc}
  327. procedure SetStdIOCodePage(var T: Text); inline;
  328. begin
  329. case TextRec(T).Mode of
  330. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  331. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  332. end;
  333. end;
  334. procedure SetStdIOCodePages; inline;
  335. begin
  336. SetStdIOCodePage(Input);
  337. SetStdIOCodePage(Output);
  338. SetStdIOCodePage(ErrOutput);
  339. SetStdIOCodePage(StdOut);
  340. SetStdIOCodePage(StdErr);
  341. end;
  342. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  343. var
  344. us: UnicodeString;
  345. begin
  346. Ansi2UnicodeMove(source,cp,us,len);
  347. dest:=us;
  348. end;
  349. function UpperWideString(const s : WideString) : WideString;
  350. begin
  351. Result:=UpperUnicodeString(s);
  352. end;
  353. function LowerWideString(const s : WideString) : WideString;
  354. begin
  355. Result:=LowerUnicodeString(s);
  356. end;
  357. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  358. begin
  359. Result:=CompareUnicodeString(s1, s2);
  360. end;
  361. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  362. begin
  363. Result:=CompareTextUnicodeString(s1, s2);
  364. end;
  365. Procedure SetCWideStringManager;
  366. Var
  367. CWideStringManager : TUnicodeStringManager;
  368. begin
  369. CWideStringManager:=widestringmanager;
  370. With CWideStringManager do
  371. begin
  372. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  373. Ansi2WideMoveProc:=@Ansi2WideMove;
  374. UpperWideStringProc:=@UpperWideString;
  375. LowerWideStringProc:=@LowerWideString;
  376. CompareWideStringProc:=@CompareWideString;
  377. CompareTextWideStringProc:=@CompareTextWideString;
  378. UpperAnsiStringProc:=@UpperAnsiString;
  379. LowerAnsiStringProc:=@LowerAnsiString;
  380. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  381. CompareTextAnsiStringProc:=@AnsiCompareText;
  382. StrCompAnsiStringProc:=@StrCompAnsi;
  383. StrICompAnsiStringProc:=@AnsiStrIComp;
  384. StrLCompAnsiStringProc:=@AnsiStrLComp;
  385. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  386. StrLowerAnsiStringProc:=@AnsiStrLower;
  387. StrUpperAnsiStringProc:=@AnsiStrUpper;
  388. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  389. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  390. UpperUnicodeStringProc:=@UpperUnicodeString;
  391. LowerUnicodeStringProc:=@LowerUnicodeString;
  392. CompareUnicodeStringProc:=@CompareUnicodeString;
  393. CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  394. GetStandardCodePageProc:=@GetStandardCodePage;
  395. CodePointLengthProc:=@CodePointLength;
  396. end;
  397. SetUnicodeStringManager(CWideStringManager);
  398. end;
  399. procedure UnloadICU;
  400. begin
  401. if hlibICUi18n <> 0 then begin
  402. if DefColl <> nil then
  403. ucol_close(DefColl);
  404. UnloadLibrary(hlibICUi18n);
  405. hlibICUi18n:=0;
  406. end;
  407. if hlibICU <> 0 then begin
  408. if DefConv <> nil then
  409. ucnv_close(DefConv);
  410. if LastConv <> nil then
  411. ucnv_close(LastConv);
  412. UnloadLibrary(hlibICU);
  413. hlibICU:=0;
  414. end;
  415. end;
  416. procedure LoadICU;
  417. var
  418. LibVer: ansistring;
  419. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  420. var
  421. p: pointer;
  422. begin
  423. if hLib = 0 then
  424. hLib:=hlibICU;
  425. p:=GetProcedureAddress(hlib, Name + LibVer);
  426. if p = nil then begin
  427. // unload lib on failure
  428. UnloadICU;
  429. Result:=False;
  430. end
  431. else begin
  432. pointer(ProcPtr):=p;
  433. Result:=True;
  434. end;
  435. end;
  436. const
  437. ICUver: array [1..5] of ansistring = ('3_8', '4_2', '44', '46', '48');
  438. TestProcName = 'ucnv_open';
  439. var
  440. i: longint;
  441. s: ansistring;
  442. begin
  443. hlibICU:=LoadLibrary('libicuuc.so');
  444. hlibICUi18n:=LoadLibrary('libicui18n.so');
  445. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  446. UnloadICU;
  447. exit;
  448. end;
  449. // Finding ICU version using known versions table
  450. for i:=High(ICUver) downto Low(ICUver) do begin
  451. s:='_' + ICUver[i];
  452. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  453. LibVer:=s;
  454. break;
  455. end;
  456. end;
  457. if LibVer = '' then begin
  458. // Finding unknown ICU version
  459. Val(ICUver[High(ICUver)], i);
  460. repeat
  461. Inc(i, 2);
  462. Str(i, s);
  463. s:='_' + s;
  464. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  465. LibVer:=s;
  466. break;
  467. end;
  468. until i >= 100;
  469. end;
  470. if LibVer = '' then begin
  471. // Trying versionless name
  472. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  473. // Unable to get ICU version
  474. UnloadICU;
  475. exit;
  476. end;
  477. end;
  478. if not _GetProc('ucnv_open', ucnv_open) then exit;
  479. if not _GetProc('ucnv_close', ucnv_close) then exit;
  480. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  481. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  482. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  483. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  484. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  485. if not _GetProc('u_strToLower', u_strToLower) then exit;
  486. if not _GetProc('u_strCompare', u_strCompare) then exit;
  487. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  488. if not _GetProc('u_errorName', u_errorName) then exit;
  489. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  490. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  491. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  492. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  493. end;
  494. initialization
  495. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  496. DefaultUnicodeCodePage:=CP_UTF16;
  497. LoadICU;
  498. SetCWideStringManager;
  499. SetStdIOCodePages;
  500. finalization
  501. UnloadICU;
  502. end.