cwstring.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. threadvar
  46. ThreadDataInited: boolean;
  47. DefConv, LastConv: PUConverter;
  48. LastCP: TSystemCodePage;
  49. DefColl: PUCollator;
  50. function OpenConverter(const name: ansistring): PUConverter;
  51. var
  52. err: UErrorCode;
  53. begin
  54. err:=0;
  55. Result:=ucnv_open(PAnsiChar(name), err);
  56. if Result <> nil then begin
  57. ucnv_setSubstChars(Result, '?', 1, err);
  58. ucnv_setFallback(Result, True);
  59. end;
  60. end;
  61. procedure InitThreadData;
  62. var
  63. err: UErrorCode;
  64. col: PUCollator;
  65. begin
  66. if (hlibICU = 0) or ThreadDataInited then
  67. exit;
  68. ThreadDataInited:=True;
  69. DefConv:=OpenConverter('utf8');
  70. err:=0;
  71. col:=ucol_open(nil, err);
  72. if col <> nil then
  73. ucol_setStrength(col, 2);
  74. DefColl:=col;
  75. end;
  76. function GetConverter(cp: TSystemCodePage): PUConverter;
  77. var
  78. s: ansistring;
  79. begin
  80. if hlibICU = 0 then begin
  81. Result:=nil;
  82. exit;
  83. end;
  84. InitThreadData;
  85. if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then
  86. Result:=DefConv
  87. else begin
  88. if cp <> LastCP then begin
  89. Str(cp, s);
  90. LastConv:=OpenConverter('cp' + s);
  91. LastCP:=cp;
  92. end;
  93. Result:=LastConv;
  94. end;
  95. end;
  96. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  97. var
  98. len2: SizeInt;
  99. conv: PUConverter;
  100. err: UErrorCode;
  101. begin
  102. if len = 0 then begin
  103. dest:='';
  104. exit;
  105. end;
  106. conv:=GetConverter(cp);
  107. if conv = nil then begin
  108. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  109. exit;
  110. end;
  111. len2:=len*3;
  112. SetLength(dest, len2);
  113. err:=0;
  114. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  115. if len2 > Length(dest) then begin
  116. SetLength(dest, len2);
  117. err:=0;
  118. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  119. end;
  120. SetLength(dest, len2);
  121. SetCodePage(dest, cp, False);
  122. end;
  123. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  124. var
  125. len2: SizeInt;
  126. conv: PUConverter;
  127. err: UErrorCode;
  128. begin
  129. if len = 0 then begin
  130. dest:='';
  131. exit;
  132. end;
  133. conv:=GetConverter(cp);
  134. if conv = nil then begin
  135. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  136. exit;
  137. end;
  138. len2:=len;
  139. SetLength(dest, len2);
  140. err:=0;
  141. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  142. if len2 > Length(dest) then begin
  143. SetLength(dest, len2);
  144. err:=0;
  145. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  146. end;
  147. SetLength(dest, len2);
  148. end;
  149. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  150. var
  151. len, len2: SizeInt;
  152. err: UErrorCode;
  153. begin
  154. if hlibICU = 0 then begin
  155. // fallback implementation
  156. Result:=UnicodeString(UpCase(AnsiString(s)));
  157. exit;
  158. end;
  159. len:=Length(s);
  160. SetLength(Result, len);
  161. if len = 0 then
  162. exit;
  163. err:=0;
  164. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  165. if len2 > len then begin
  166. SetLength(Result, len2);
  167. err:=0;
  168. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  169. end;
  170. SetLength(Result, len2);
  171. end;
  172. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  173. var
  174. len, len2: SizeInt;
  175. err: UErrorCode;
  176. begin
  177. if hlibICU = 0 then begin
  178. // fallback implementation
  179. Result:=UnicodeString(LowerCase(AnsiString(s)));
  180. exit;
  181. end;
  182. len:=Length(s);
  183. SetLength(Result, len);
  184. if len = 0 then
  185. exit;
  186. err:=0;
  187. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  188. if len2 > len then begin
  189. SetLength(Result, len2);
  190. err:=0;
  191. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  192. end;
  193. SetLength(Result, len2);
  194. end;
  195. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  196. var
  197. count, count1, count2: SizeInt;
  198. begin
  199. result := 0;
  200. Count1 := Length(S1);
  201. Count2 := Length(S2);
  202. if Count1>Count2 then
  203. Count:=Count2
  204. else
  205. Count:=Count1;
  206. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  207. if result=0 then
  208. result:=Count1 - Count2;
  209. end;
  210. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  211. begin
  212. if hlibICU = 0 then begin
  213. // fallback implementation
  214. Result:=_CompareStr(s1, s2);
  215. exit;
  216. end;
  217. InitThreadData;
  218. if DefColl <> nil then
  219. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  220. else
  221. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  222. end;
  223. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  224. const
  225. U_COMPARE_CODE_POINT_ORDER = $8000;
  226. var
  227. err: UErrorCode;
  228. begin
  229. if hlibICU = 0 then begin
  230. // fallback implementation
  231. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  232. exit;
  233. end;
  234. err:=0;
  235. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  236. end;
  237. function UpperAnsiString(const s : AnsiString) : AnsiString;
  238. begin
  239. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  240. end;
  241. function LowerAnsiString(const s : AnsiString) : AnsiString;
  242. begin
  243. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  244. end;
  245. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  246. begin
  247. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  248. end;
  249. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  250. begin
  251. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  252. end;
  253. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  254. begin
  255. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  256. end;
  257. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  258. begin
  259. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  260. end;
  261. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  262. var
  263. as1, as2: ansistring;
  264. begin
  265. SetString(as1, S1, MaxLen);
  266. SetString(as2, S2, MaxLen);
  267. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
  268. end;
  269. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  270. var
  271. as1, as2: ansistring;
  272. begin
  273. SetString(as1, S1, MaxLen);
  274. SetString(as2, S2, MaxLen);
  275. Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
  276. end;
  277. function AnsiStrLower(Str: PChar): PChar;
  278. var
  279. s, res: ansistring;
  280. begin
  281. s:=Str;
  282. res:=LowerAnsiString(s);
  283. if Length(res) > Length(s) then
  284. SetLength(res, Length(s));
  285. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  286. Result:=Str;
  287. end;
  288. function AnsiStrUpper(Str: PChar): PChar;
  289. var
  290. s, res: ansistring;
  291. begin
  292. s:=Str;
  293. res:=UpperAnsiString(s);
  294. if Length(res) > Length(s) then
  295. SetLength(res, Length(s));
  296. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  297. Result:=Str;
  298. end;
  299. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  300. var
  301. c: byte;
  302. begin
  303. // Only UTF-8 encoding is supported
  304. c:=byte(Str^);
  305. if c = 0 then
  306. Result:=0
  307. else begin
  308. Result:=1;
  309. if c < $80 then
  310. exit; // 1-byte ASCII char
  311. while c and $C0 = $C0 do begin
  312. Inc(Result);
  313. c:=c shl 1;
  314. end;
  315. if Result > 6 then
  316. Result:=1 // Invalid code point
  317. else
  318. if Result > MaxLookAead then
  319. Result:=-1; // Incomplete code point
  320. end;
  321. end;
  322. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  323. begin
  324. Result := CP_UTF8; // Android always uses UTF-8
  325. end;
  326. procedure SetStdIOCodePage(var T: Text); inline;
  327. begin
  328. case TextRec(T).Mode of
  329. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  330. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  331. end;
  332. end;
  333. procedure SetStdIOCodePages; inline;
  334. begin
  335. SetStdIOCodePage(Input);
  336. SetStdIOCodePage(Output);
  337. SetStdIOCodePage(ErrOutput);
  338. SetStdIOCodePage(StdOut);
  339. SetStdIOCodePage(StdErr);
  340. end;
  341. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  342. var
  343. us: UnicodeString;
  344. begin
  345. Ansi2UnicodeMove(source,cp,us,len);
  346. dest:=us;
  347. end;
  348. function UpperWideString(const s : WideString) : WideString;
  349. begin
  350. Result:=UpperUnicodeString(s);
  351. end;
  352. function LowerWideString(const s : WideString) : WideString;
  353. begin
  354. Result:=LowerUnicodeString(s);
  355. end;
  356. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  357. begin
  358. Result:=CompareUnicodeString(s1, s2);
  359. end;
  360. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  361. begin
  362. Result:=CompareTextUnicodeString(s1, s2);
  363. end;
  364. Procedure SetCWideStringManager;
  365. Var
  366. CWideStringManager : TUnicodeStringManager;
  367. begin
  368. CWideStringManager:=widestringmanager;
  369. With CWideStringManager do
  370. begin
  371. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  372. Ansi2WideMoveProc:=@Ansi2WideMove;
  373. UpperWideStringProc:=@UpperWideString;
  374. LowerWideStringProc:=@LowerWideString;
  375. CompareWideStringProc:=@CompareWideString;
  376. CompareTextWideStringProc:=@CompareTextWideString;
  377. UpperAnsiStringProc:=@UpperAnsiString;
  378. LowerAnsiStringProc:=@LowerAnsiString;
  379. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  380. CompareTextAnsiStringProc:=@AnsiCompareText;
  381. StrCompAnsiStringProc:=@StrCompAnsi;
  382. StrICompAnsiStringProc:=@AnsiStrIComp;
  383. StrLCompAnsiStringProc:=@AnsiStrLComp;
  384. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  385. StrLowerAnsiStringProc:=@AnsiStrLower;
  386. StrUpperAnsiStringProc:=@AnsiStrUpper;
  387. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  388. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  389. UpperUnicodeStringProc:=@UpperUnicodeString;
  390. LowerUnicodeStringProc:=@LowerUnicodeString;
  391. CompareUnicodeStringProc:=@CompareUnicodeString;
  392. CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  393. GetStandardCodePageProc:=@GetStandardCodePage;
  394. CodePointLengthProc:=@CodePointLength;
  395. end;
  396. SetUnicodeStringManager(CWideStringManager);
  397. end;
  398. procedure UnloadICU;
  399. begin
  400. if hlibICUi18n <> 0 then begin
  401. if DefColl <> nil then
  402. ucol_close(DefColl);
  403. UnloadLibrary(hlibICUi18n);
  404. hlibICUi18n:=0;
  405. end;
  406. if hlibICU <> 0 then begin
  407. if DefConv <> nil then
  408. ucnv_close(DefConv);
  409. if LastConv <> nil then
  410. ucnv_close(LastConv);
  411. UnloadLibrary(hlibICU);
  412. hlibICU:=0;
  413. end;
  414. end;
  415. procedure LoadICU;
  416. var
  417. LibVer: ansistring;
  418. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  419. var
  420. p: pointer;
  421. begin
  422. if hLib = 0 then
  423. hLib:=hlibICU;
  424. p:=GetProcedureAddress(hlib, Name + LibVer);
  425. if p = nil then begin
  426. // unload lib on failure
  427. UnloadICU;
  428. Result:=False;
  429. end
  430. else begin
  431. pointer(ProcPtr):=p;
  432. Result:=True;
  433. end;
  434. end;
  435. const
  436. ICUver: array [1..5] of ansistring = ('3_8', '4_2', '44', '46', '48');
  437. TestProcName = 'ucnv_open';
  438. var
  439. i: longint;
  440. s: ansistring;
  441. begin
  442. hlibICU:=LoadLibrary('libicuuc.so');
  443. hlibICUi18n:=LoadLibrary('libicui18n.so');
  444. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  445. UnloadICU;
  446. exit;
  447. end;
  448. // Finding ICU version using known versions table
  449. for i:=High(ICUver) downto Low(ICUver) do begin
  450. s:='_' + ICUver[i];
  451. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  452. LibVer:=s;
  453. break;
  454. end;
  455. end;
  456. if LibVer = '' then begin
  457. // Finding unknown ICU version
  458. Val(ICUver[High(ICUver)], i);
  459. repeat
  460. Inc(i, 2);
  461. Str(i, s);
  462. s:='_' + s;
  463. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  464. LibVer:=s;
  465. break;
  466. end;
  467. until i >= 100;
  468. end;
  469. if LibVer = '' then begin
  470. // Trying versionless name
  471. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  472. // Unable to get ICU version
  473. UnloadICU;
  474. exit;
  475. end;
  476. end;
  477. if not _GetProc('ucnv_open', ucnv_open) then exit;
  478. if not _GetProc('ucnv_close', ucnv_close) then exit;
  479. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  480. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  481. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  482. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  483. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  484. if not _GetProc('u_strToLower', u_strToLower) then exit;
  485. if not _GetProc('u_strCompare', u_strCompare) then exit;
  486. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  487. if not _GetProc('u_errorName', u_errorName) then exit;
  488. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  489. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  490. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  491. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  492. end;
  493. initialization
  494. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  495. DefaultUnicodeCodePage:=CP_UTF16;
  496. LoadICU;
  497. SetCWideStringManager;
  498. SetStdIOCodePages;
  499. finalization
  500. UnloadICU;
  501. end.