cwstring.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. DefConv, LastConv: PUConverter;
  46. LastCP: TSystemCodePage;
  47. DefColl: PUCollator;
  48. function OpenConverter(const name: ansistring): PUConverter;
  49. var
  50. err: UErrorCode;
  51. begin
  52. err:=0;
  53. Result:=ucnv_open(PAnsiChar(name), err);
  54. if Result <> nil then begin
  55. ucnv_setSubstChars(Result, '?', 1, err);
  56. ucnv_setFallback(Result, True);
  57. end;
  58. end;
  59. function GetConverter(cp: TSystemCodePage): PUConverter;
  60. var
  61. s: ansistring;
  62. begin
  63. if hlibICU = 0 then begin
  64. Result:=nil;
  65. exit;
  66. end;
  67. if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then
  68. Result:=DefConv
  69. else begin
  70. if cp <> LastCP then begin
  71. Str(cp, s);
  72. LastConv:=OpenConverter('cp' + s);
  73. LastCP:=cp;
  74. end;
  75. Result:=LastConv;
  76. end;
  77. end;
  78. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  79. var
  80. len2: SizeInt;
  81. conv: PUConverter;
  82. err: UErrorCode;
  83. begin
  84. if len = 0 then begin
  85. dest:='';
  86. exit;
  87. end;
  88. conv:=GetConverter(cp);
  89. if conv = nil then begin
  90. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  91. exit;
  92. end;
  93. len2:=len*3;
  94. SetLength(dest, len2);
  95. err:=0;
  96. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  97. if len2 > Length(dest) then begin
  98. SetLength(dest, len2);
  99. err:=0;
  100. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  101. end;
  102. SetLength(dest, len2);
  103. SetCodePage(dest, cp, False);
  104. end;
  105. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  106. var
  107. len2: SizeInt;
  108. conv: PUConverter;
  109. err: UErrorCode;
  110. begin
  111. if len = 0 then begin
  112. dest:='';
  113. exit;
  114. end;
  115. conv:=GetConverter(cp);
  116. if conv = nil then begin
  117. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  118. exit;
  119. end;
  120. len2:=len;
  121. SetLength(dest, len2);
  122. err:=0;
  123. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  124. if len2 > Length(dest) then begin
  125. SetLength(dest, len2);
  126. err:=0;
  127. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  128. end;
  129. SetLength(dest, len2);
  130. end;
  131. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  132. var
  133. len, len2: SizeInt;
  134. err: UErrorCode;
  135. begin
  136. if hlibICU = 0 then begin
  137. // fallback implementation
  138. Result:=UnicodeString(UpCase(AnsiString(s)));
  139. exit;
  140. end;
  141. len:=Length(s);
  142. SetLength(Result, len);
  143. if len = 0 then
  144. exit;
  145. err:=0;
  146. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  147. if len2 > len then begin
  148. SetLength(Result, len2);
  149. err:=0;
  150. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  151. end;
  152. SetLength(Result, len2);
  153. end;
  154. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  155. var
  156. len, len2: SizeInt;
  157. err: UErrorCode;
  158. begin
  159. if hlibICU = 0 then begin
  160. // fallback implementation
  161. Result:=UnicodeString(LowerCase(AnsiString(s)));
  162. exit;
  163. end;
  164. len:=Length(s);
  165. SetLength(Result, len);
  166. if len = 0 then
  167. exit;
  168. err:=0;
  169. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  170. if len2 > len then begin
  171. SetLength(Result, len2);
  172. err:=0;
  173. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  174. end;
  175. SetLength(Result, len2);
  176. end;
  177. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  178. var
  179. count, count1, count2: SizeInt;
  180. begin
  181. result := 0;
  182. Count1 := Length(S1);
  183. Count2 := Length(S2);
  184. if Count1>Count2 then
  185. Count:=Count2
  186. else
  187. Count:=Count1;
  188. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  189. if result=0 then
  190. result:=Count1 - Count2;
  191. end;
  192. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  193. begin
  194. if hlibICU = 0 then begin
  195. // fallback implementation
  196. Result:=_CompareStr(s1, s2);
  197. exit;
  198. end;
  199. if DefColl <> nil then
  200. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  201. else
  202. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  203. end;
  204. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  205. const
  206. U_COMPARE_CODE_POINT_ORDER = $8000;
  207. var
  208. err: UErrorCode;
  209. begin
  210. if hlibICU = 0 then begin
  211. // fallback implementation
  212. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  213. exit;
  214. end;
  215. err:=0;
  216. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  217. end;
  218. function UpperAnsiString(const s : AnsiString) : AnsiString;
  219. begin
  220. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  221. end;
  222. function LowerAnsiString(const s : AnsiString) : AnsiString;
  223. begin
  224. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  225. end;
  226. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  227. begin
  228. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  229. end;
  230. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  231. begin
  232. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  233. end;
  234. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  235. begin
  236. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  237. end;
  238. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  239. begin
  240. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  241. end;
  242. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  243. var
  244. as1, as2: ansistring;
  245. begin
  246. SetString(as1, S1, MaxLen);
  247. SetString(as2, S2, MaxLen);
  248. Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
  249. end;
  250. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  251. var
  252. as1, as2: ansistring;
  253. begin
  254. SetString(as1, S1, MaxLen);
  255. SetString(as2, S2, MaxLen);
  256. Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
  257. end;
  258. function AnsiStrLower(Str: PChar): PChar;
  259. var
  260. s, res: ansistring;
  261. begin
  262. s:=Str;
  263. res:=LowerAnsiString(s);
  264. if Length(res) > Length(s) then
  265. SetLength(res, Length(s));
  266. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  267. Result:=Str;
  268. end;
  269. function AnsiStrUpper(Str: PChar): PChar;
  270. var
  271. s, res: ansistring;
  272. begin
  273. s:=Str;
  274. res:=UpperAnsiString(s);
  275. if Length(res) > Length(s) then
  276. SetLength(res, Length(s));
  277. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  278. Result:=Str;
  279. end;
  280. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  281. var
  282. c: byte;
  283. begin
  284. // Only UTF-8 encoding is supported
  285. c:=byte(Str^);
  286. if c = 0 then
  287. Result:=0
  288. else begin
  289. Result:=1;
  290. if c < $80 then
  291. exit; // 1-byte ASCII char
  292. while c and $C0 = $C0 do begin
  293. Inc(Result);
  294. c:=c shl 1;
  295. end;
  296. if Result > 6 then
  297. Result:=1 // Invalid code point
  298. else
  299. if Result > MaxLookAead then
  300. Result:=-1; // Incomplete code point
  301. end;
  302. end;
  303. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  304. begin
  305. Result := CP_UTF8; // Android always uses UTF-8
  306. end;
  307. {$i textrec.inc}
  308. procedure SetStdIOCodePage(var T: Text); inline;
  309. begin
  310. case TextRec(T).Mode of
  311. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  312. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  313. end;
  314. end;
  315. procedure SetStdIOCodePages; inline;
  316. begin
  317. SetStdIOCodePage(Input);
  318. SetStdIOCodePage(Output);
  319. SetStdIOCodePage(ErrOutput);
  320. SetStdIOCodePage(StdOut);
  321. SetStdIOCodePage(StdErr);
  322. end;
  323. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  324. var
  325. us: UnicodeString;
  326. begin
  327. Ansi2UnicodeMove(source,cp,us,len);
  328. dest:=us;
  329. end;
  330. function UpperWideString(const s : WideString) : WideString;
  331. begin
  332. Result:=UpperUnicodeString(s);
  333. end;
  334. function LowerWideString(const s : WideString) : WideString;
  335. begin
  336. Result:=LowerUnicodeString(s);
  337. end;
  338. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  339. begin
  340. Result:=CompareUnicodeString(s1, s2);
  341. end;
  342. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  343. begin
  344. Result:=CompareTextUnicodeString(s1, s2);
  345. end;
  346. Procedure SetCWideStringManager;
  347. Var
  348. CWideStringManager : TUnicodeStringManager;
  349. begin
  350. CWideStringManager:=widestringmanager;
  351. With CWideStringManager do
  352. begin
  353. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  354. Ansi2WideMoveProc:=@Ansi2WideMove;
  355. UpperWideStringProc:=@UpperWideString;
  356. LowerWideStringProc:=@LowerWideString;
  357. CompareWideStringProc:=@CompareWideString;
  358. CompareTextWideStringProc:=@CompareTextWideString;
  359. UpperAnsiStringProc:=@UpperAnsiString;
  360. LowerAnsiStringProc:=@LowerAnsiString;
  361. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  362. CompareTextAnsiStringProc:=@AnsiCompareText;
  363. StrCompAnsiStringProc:=@StrCompAnsi;
  364. StrICompAnsiStringProc:=@AnsiStrIComp;
  365. StrLCompAnsiStringProc:=@AnsiStrLComp;
  366. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  367. StrLowerAnsiStringProc:=@AnsiStrLower;
  368. StrUpperAnsiStringProc:=@AnsiStrUpper;
  369. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  370. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  371. UpperUnicodeStringProc:=@UpperUnicodeString;
  372. LowerUnicodeStringProc:=@LowerUnicodeString;
  373. CompareUnicodeStringProc:=@CompareUnicodeString;
  374. CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  375. GetStandardCodePageProc:=@GetStandardCodePage;
  376. CodePointLengthProc:=@CodePointLength;
  377. end;
  378. SetUnicodeStringManager(CWideStringManager);
  379. end;
  380. procedure UnloadICU;
  381. begin
  382. if hlibICUi18n <> 0 then begin
  383. if DefColl <> nil then
  384. ucol_close(DefColl);
  385. UnloadLibrary(hlibICUi18n);
  386. hlibICUi18n:=0;
  387. end;
  388. if hlibICU <> 0 then begin
  389. if DefConv <> nil then
  390. ucnv_close(DefConv);
  391. if LastConv <> nil then
  392. ucnv_close(LastConv);
  393. UnloadLibrary(hlibICU);
  394. hlibICU:=0;
  395. end;
  396. end;
  397. procedure LoadICU;
  398. var
  399. LibVer: ansistring;
  400. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  401. var
  402. p: pointer;
  403. begin
  404. if hLib = 0 then
  405. hLib:=hlibICU;
  406. p:=GetProcedureAddress(hlib, Name + LibVer);
  407. if p = nil then begin
  408. // unload lib on failure
  409. UnloadICU;
  410. Result:=False;
  411. end
  412. else begin
  413. pointer(ProcPtr):=p;
  414. Result:=True;
  415. end;
  416. end;
  417. const
  418. ICUver: array [1..4] of ansistring = ('3_8', '4_2', '44', '46');
  419. TestProcName = 'ucnv_open';
  420. var
  421. err: UErrorCode;
  422. i: longint;
  423. s: ansistring;
  424. begin
  425. hlibICU:=LoadLibrary('libicuuc.so');
  426. hlibICUi18n:=LoadLibrary('libicui18n.so');
  427. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  428. UnloadICU;
  429. exit;
  430. end;
  431. // Finding ICU version using known versions table
  432. for i:=High(ICUver) downto Low(ICUver) do begin
  433. s:='_' + ICUver[i];
  434. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  435. LibVer:=s;
  436. break;
  437. end;
  438. end;
  439. if LibVer = '' then begin
  440. // Finding unknown ICU version
  441. Val(ICUver[High(ICUver)], i);
  442. for i:=i + 1 to 100 do begin
  443. Str(i, s);
  444. s:='_' + s;
  445. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  446. LibVer:=s;
  447. break;
  448. end;
  449. end;
  450. end;
  451. if LibVer = '' then begin
  452. // Trying versionless name
  453. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  454. // Unable to get ICU version
  455. UnloadICU;
  456. exit;
  457. end;
  458. end;
  459. if not _GetProc('ucnv_open', ucnv_open) then exit;
  460. if not _GetProc('ucnv_close', ucnv_close) then exit;
  461. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  462. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  463. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  464. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  465. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  466. if not _GetProc('u_strToLower', u_strToLower) then exit;
  467. if not _GetProc('u_strCompare', u_strCompare) then exit;
  468. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  469. if not _GetProc('u_errorName', u_errorName) then exit;
  470. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  471. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  472. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  473. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  474. DefConv:=OpenConverter('utf8');
  475. err:=0;
  476. DefColl:=ucol_open(nil, err);
  477. if DefColl <> nil then
  478. ucol_setStrength(DefColl, 2);
  479. end;
  480. initialization
  481. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  482. DefaultUnicodeCodePage:=CP_UTF16;
  483. LoadICU;
  484. SetCWideStringManager;
  485. SetStdIOCodePages;
  486. finalization
  487. UnloadICU;
  488. end.