cwstring.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by Yury Sidorov,
  4. member of the Free Pascal development team.
  5. Wide string support for Android
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$inline on}
  14. {$implicitexceptions off}
  15. unit cwstring;
  16. interface
  17. procedure SetCWidestringManager;
  18. implementation
  19. uses dynlibs;
  20. type
  21. UErrorCode = SizeInt;
  22. int32_t = longint;
  23. uint32_t = longword;
  24. PUConverter = pointer;
  25. PUCollator = pointer;
  26. UBool = LongBool;
  27. var
  28. hlibICU: TLibHandle;
  29. hlibICUi18n: TLibHandle;
  30. ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
  31. ucnv_close: procedure (converter: PUConverter); cdecl;
  32. ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
  33. ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
  34. ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  35. ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  36. u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  37. u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
  38. u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
  39. u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
  40. ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
  41. ucol_close: procedure (coll: PUCollator); cdecl;
  42. ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
  43. ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
  44. u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
  45. DefConv, LastConv: PUConverter;
  46. LastCP: TSystemCodePage;
  47. DefColl: PUCollator;
  48. function OpenConverter(const name: ansistring): PUConverter;
  49. var
  50. err: UErrorCode;
  51. begin
  52. err:=0;
  53. Result:=ucnv_open(PAnsiChar(name), err);
  54. if Result <> nil then begin
  55. ucnv_setSubstChars(Result, '?', 1, err);
  56. ucnv_setFallback(Result, True);
  57. end;
  58. end;
  59. function GetConverter(cp: TSystemCodePage): PUConverter;
  60. var
  61. s: ansistring;
  62. begin
  63. if hlibICU = 0 then begin
  64. Result:=nil;
  65. exit;
  66. end;
  67. if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then
  68. Result:=DefConv
  69. else begin
  70. if cp <> LastCP then begin
  71. Str(cp, s);
  72. LastConv:=OpenConverter('cp' + s);
  73. LastCP:=cp;
  74. end;
  75. Result:=LastConv;
  76. end;
  77. end;
  78. procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
  79. var
  80. len2: SizeInt;
  81. conv: PUConverter;
  82. err: UErrorCode;
  83. begin
  84. if len = 0 then begin
  85. dest:='';
  86. exit;
  87. end;
  88. conv:=GetConverter(cp);
  89. if conv = nil then begin
  90. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  91. exit;
  92. end;
  93. len2:=len*3;
  94. SetLength(dest, len2);
  95. err:=0;
  96. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  97. if len2 > Length(dest) then begin
  98. SetLength(dest, len2);
  99. err:=0;
  100. len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
  101. end;
  102. SetLength(dest, len2);
  103. SetCodePage(dest, cp, False);
  104. end;
  105. procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  106. var
  107. len2: SizeInt;
  108. conv: PUConverter;
  109. err: UErrorCode;
  110. begin
  111. if len = 0 then begin
  112. dest:='';
  113. exit;
  114. end;
  115. conv:=GetConverter(cp);
  116. if conv = nil then begin
  117. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  118. exit;
  119. end;
  120. len2:=len;
  121. SetLength(dest, len2);
  122. err:=0;
  123. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  124. if len2 > Length(dest) then begin
  125. SetLength(dest, len2);
  126. err:=0;
  127. len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
  128. end;
  129. SetLength(dest, len2);
  130. end;
  131. function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
  132. var
  133. len, len2: SizeInt;
  134. err: UErrorCode;
  135. begin
  136. if hlibICU = 0 then begin
  137. // fallback implementation
  138. Result:=UnicodeString(UpCase(AnsiString(s)));
  139. exit;
  140. end;
  141. len:=Length(s);
  142. SetLength(Result, len);
  143. if len = 0 then
  144. exit;
  145. err:=0;
  146. len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  147. if len2 > len then begin
  148. SetLength(Result, len2);
  149. err:=0;
  150. len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  151. end;
  152. SetLength(Result, len2);
  153. end;
  154. function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
  155. var
  156. len, len2: SizeInt;
  157. err: UErrorCode;
  158. begin
  159. if hlibICU = 0 then begin
  160. // fallback implementation
  161. Result:=UnicodeString(LowerCase(AnsiString(s)));
  162. exit;
  163. end;
  164. len:=Length(s);
  165. SetLength(Result, len);
  166. if len = 0 then
  167. exit;
  168. err:=0;
  169. len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
  170. if len2 > len then begin
  171. SetLength(Result, len2);
  172. err:=0;
  173. len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
  174. end;
  175. SetLength(Result, len2);
  176. end;
  177. function _CompareStr(const S1, S2: UnicodeString): PtrInt;
  178. var
  179. count, count1, count2: SizeInt;
  180. begin
  181. result := 0;
  182. Count1 := Length(S1);
  183. Count2 := Length(S2);
  184. if Count1>Count2 then
  185. Count:=Count2
  186. else
  187. Count:=Count1;
  188. result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
  189. if result=0 then
  190. result:=Count1 - Count2;
  191. end;
  192. function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  193. begin
  194. if hlibICU = 0 then begin
  195. // fallback implementation
  196. Result:=_CompareStr(s1, s2);
  197. exit;
  198. end;
  199. if DefColl <> nil then
  200. Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
  201. else
  202. Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
  203. end;
  204. function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
  205. const
  206. U_COMPARE_CODE_POINT_ORDER = $8000;
  207. var
  208. err: UErrorCode;
  209. begin
  210. if hlibICU = 0 then begin
  211. // fallback implementation
  212. Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
  213. exit;
  214. end;
  215. err:=0;
  216. Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
  217. end;
  218. function UpperAnsiString(const s : AnsiString) : AnsiString;
  219. begin
  220. Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
  221. end;
  222. function LowerAnsiString(const s : AnsiString) : AnsiString;
  223. begin
  224. Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
  225. end;
  226. function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
  227. begin
  228. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  229. end;
  230. function StrCompAnsi(s1,s2 : PChar): PtrInt;
  231. begin
  232. Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
  233. end;
  234. function AnsiCompareText(const S1, S2: ansistring): PtrInt;
  235. begin
  236. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  237. end;
  238. function AnsiStrIComp(S1, S2: PChar): PtrInt;
  239. begin
  240. Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
  241. end;
  242. function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  243. begin
  244. Result:=CompareUnicodeString(UnicodeString(Copy(s1, 1, MaxLen)), UnicodeString(Copy(s2, 1, MaxLen)));
  245. end;
  246. function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  247. begin
  248. Result:=CompareTextUnicodeString(UnicodeString(Copy(s1, 1, MaxLen)), UnicodeString(Copy(s2, 1, MaxLen)));
  249. end;
  250. function AnsiStrLower(Str: PChar): PChar;
  251. var
  252. s, res: ansistring;
  253. begin
  254. s:=Str;
  255. res:=LowerAnsiString(s);
  256. if Length(res) > Length(s) then
  257. SetLength(res, Length(s));
  258. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  259. Result:=Str;
  260. end;
  261. function AnsiStrUpper(Str: PChar): PChar;
  262. var
  263. s, res: ansistring;
  264. begin
  265. s:=Str;
  266. res:=UpperAnsiString(s);
  267. if Length(res) > Length(s) then
  268. SetLength(res, Length(s));
  269. Move(PAnsiChar(res)^, Str, Length(res) + 1);
  270. Result:=Str;
  271. end;
  272. function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  273. var
  274. c: byte;
  275. begin
  276. // Only UTF-8 encoding is supported
  277. c:=byte(Str^);
  278. if c = 0 then
  279. Result:=0
  280. else begin
  281. Result:=1;
  282. if c < $80 then
  283. exit; // 1-byte ASCII char
  284. while c and $C0 = $C0 do begin
  285. Inc(Result);
  286. c:=c shl 1;
  287. end;
  288. if Result > 6 then
  289. Result:=1 // Invalid code point
  290. else
  291. if Result > MaxLookAead then
  292. Result:=-1; // Incomplete code point
  293. end;
  294. end;
  295. function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  296. begin
  297. Result := CP_UTF8; // Android always uses UTF-8
  298. end;
  299. {$i textrec.inc}
  300. procedure SetStdIOCodePage(var T: Text); inline;
  301. begin
  302. case TextRec(T).Mode of
  303. fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
  304. fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
  305. end;
  306. end;
  307. procedure SetStdIOCodePages; inline;
  308. begin
  309. SetStdIOCodePage(Input);
  310. SetStdIOCodePage(Output);
  311. SetStdIOCodePage(ErrOutput);
  312. SetStdIOCodePage(StdOut);
  313. SetStdIOCodePage(StdErr);
  314. end;
  315. procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
  316. var
  317. us: UnicodeString;
  318. begin
  319. Ansi2UnicodeMove(source,cp,us,len);
  320. dest:=us;
  321. end;
  322. function UpperWideString(const s : WideString) : WideString;
  323. begin
  324. Result:=UpperUnicodeString(s);
  325. end;
  326. function LowerWideString(const s : WideString) : WideString;
  327. begin
  328. Result:=LowerUnicodeString(s);
  329. end;
  330. function CompareWideString(const s1, s2 : WideString) : PtrInt;
  331. begin
  332. Result:=CompareUnicodeString(s1, s2);
  333. end;
  334. function CompareTextWideString(const s1, s2 : WideString): PtrInt;
  335. begin
  336. Result:=CompareTextUnicodeString(s1, s2);
  337. end;
  338. Procedure SetCWideStringManager;
  339. Var
  340. CWideStringManager : TUnicodeStringManager;
  341. begin
  342. CWideStringManager:=widestringmanager;
  343. With CWideStringManager do
  344. begin
  345. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  346. Ansi2WideMoveProc:=@Ansi2WideMove;
  347. UpperWideStringProc:=@UpperWideString;
  348. LowerWideStringProc:=@LowerWideString;
  349. CompareWideStringProc:=@CompareWideString;
  350. CompareTextWideStringProc:=@CompareTextWideString;
  351. UpperAnsiStringProc:=@UpperAnsiString;
  352. LowerAnsiStringProc:=@LowerAnsiString;
  353. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  354. CompareTextAnsiStringProc:=@AnsiCompareText;
  355. StrCompAnsiStringProc:=@StrCompAnsi;
  356. StrICompAnsiStringProc:=@AnsiStrIComp;
  357. StrLCompAnsiStringProc:=@AnsiStrLComp;
  358. StrLICompAnsiStringProc:=@AnsiStrLIComp;
  359. StrLowerAnsiStringProc:=@AnsiStrLower;
  360. StrUpperAnsiStringProc:=@AnsiStrUpper;
  361. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  362. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  363. UpperUnicodeStringProc:=@UpperUnicodeString;
  364. LowerUnicodeStringProc:=@LowerUnicodeString;
  365. CompareUnicodeStringProc:=@CompareUnicodeString;
  366. CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
  367. GetStandardCodePageProc:=@GetStandardCodePage;
  368. CodePointLengthProc:=@CodePointLength;
  369. end;
  370. SetUnicodeStringManager(CWideStringManager);
  371. end;
  372. procedure UnloadICU;
  373. begin
  374. if hlibICUi18n <> 0 then begin
  375. if DefColl <> nil then
  376. ucol_close(DefColl);
  377. UnloadLibrary(hlibICUi18n);
  378. hlibICUi18n:=0;
  379. end;
  380. if hlibICU <> 0 then begin
  381. if DefConv <> nil then
  382. ucnv_close(DefConv);
  383. if LastConv <> nil then
  384. ucnv_close(LastConv);
  385. UnloadLibrary(hlibICU);
  386. hlibICU:=0;
  387. end;
  388. end;
  389. procedure LoadICU;
  390. var
  391. LibVer: ansistring;
  392. function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
  393. var
  394. p: pointer;
  395. begin
  396. if hLib = 0 then
  397. hLib:=hlibICU;
  398. p:=GetProcedureAddress(hlib, Name + LibVer);
  399. if p = nil then begin
  400. // unload lib on failure
  401. UnloadICU;
  402. Result:=False;
  403. end
  404. else begin
  405. pointer(ProcPtr):=p;
  406. Result:=True;
  407. end;
  408. end;
  409. const
  410. ICUver: array [1..4] of ansistring = ('3_8', '4_2', '44', '46');
  411. TestProcName = 'ucnv_open';
  412. var
  413. err: UErrorCode;
  414. i: longint;
  415. s: ansistring;
  416. begin
  417. hlibICU:=LoadLibrary('libicuuc.so');
  418. hlibICUi18n:=LoadLibrary('libicui18n.so');
  419. if (hlibICU = 0) or (hlibICUi18n = 0) then begin
  420. UnloadICU;
  421. exit;
  422. end;
  423. // Finding ICU version using known versions table
  424. for i:=High(ICUver) downto Low(ICUver) do begin
  425. s:='_' + ICUver[i];
  426. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  427. LibVer:=s;
  428. break;
  429. end;
  430. end;
  431. if LibVer = '' then begin
  432. // Finding unknown ICU version
  433. Val(ICUver[High(ICUver)], i);
  434. for i:=i + 1 to 100 do begin
  435. Str(i, s);
  436. s:='_' + s;
  437. if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
  438. LibVer:=s;
  439. break;
  440. end;
  441. end;
  442. end;
  443. if LibVer = '' then begin
  444. // Trying versionless name
  445. if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
  446. // Unable to get ICU version
  447. UnloadICU;
  448. exit;
  449. end;
  450. end;
  451. if not _GetProc('ucnv_open', ucnv_open) then exit;
  452. if not _GetProc('ucnv_close', ucnv_close) then exit;
  453. if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
  454. if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
  455. if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
  456. if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
  457. if not _GetProc('u_strToUpper', u_strToUpper) then exit;
  458. if not _GetProc('u_strToLower', u_strToLower) then exit;
  459. if not _GetProc('u_strCompare', u_strCompare) then exit;
  460. if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
  461. if not _GetProc('u_errorName', u_errorName) then exit;
  462. if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
  463. if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
  464. if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
  465. if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
  466. DefConv:=OpenConverter('utf8');
  467. err:=0;
  468. DefColl:=ucol_open(nil, err);
  469. if DefColl <> nil then
  470. ucol_setStrength(DefColl, 2);
  471. end;
  472. initialization
  473. DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
  474. DefaultUnicodeCodePage:=CP_UTF16;
  475. LoadICU;
  476. SetCWideStringManager;
  477. SetStdIOCodePages;
  478. finalization
  479. UnloadICU;
  480. end.