123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 2013 by Yury Sidorov,
- member of the Free Pascal development team.
- Wide string support for Android
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$mode objfpc}
- {$inline on}
- {$implicitexceptions off}
- unit cwstring;
- interface
- procedure SetCWidestringManager;
- implementation
- uses dynlibs;
- type
- UErrorCode = SizeInt;
- int32_t = longint;
- uint32_t = longword;
- PUConverter = pointer;
- PUCollator = pointer;
- UBool = LongBool;
- var
- hlibICU: TLibHandle;
- hlibICUi18n: TLibHandle;
- LibVer: ansistring;
- ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
- ucnv_close: procedure (converter: PUConverter); cdecl;
- ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
- ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
- ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
- ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
- u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
- u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
- u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
- u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
- u_getDataDirectory: function(): PAnsiChar; cdecl;
- u_setDataDirectory: procedure(directory: PAnsiChar); cdecl;
- u_init: procedure(var status: UErrorCode); cdecl;
- ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
- ucol_close: procedure (coll: PUCollator); cdecl;
- ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
- ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
- threadvar
- ThreadDataInited: boolean;
- DefConv, LastConv: PUConverter;
- LastCP: TSystemCodePage;
- DefColl: PUCollator;
- function MaskExceptions: dword;
- begin
- {$if defined(cpux86_64) or defined(cpui386)}
- Result:=GetMXCSR;
- SetMXCSR(Result or %0000000010000000 {MM_MaskInvalidOp} or %0001000000000000 {MM_MaskPrecision});
- {$else}
- Result:=0;
- {$endif}
- end;
- procedure UnmaskExceptions(oldmask: dword);
- begin
- {$if defined(cpux86_64) or defined(cpui386)}
- SetMXCSR(oldmask);
- {$endif}
- end;
- function OpenConverter(const name: ansistring): PUConverter;
- var
- err: UErrorCode;
- oldmask: dword;
- begin
- { ucnv_open() must be called with some SSE exception masked on x86_64-android. }
- oldmask:=MaskExceptions;
- err:=0;
- Result:=ucnv_open(PAnsiChar(name), err);
- if Result <> nil then begin
- ucnv_setSubstChars(Result, '?', 1, err);
- ucnv_setFallback(Result, True);
- end;
- UnmaskExceptions(oldmask);
- end;
- procedure InitThreadData;
- var
- err: UErrorCode;
- col: PUCollator;
- begin
- if (hlibICU = 0) or ThreadDataInited then
- exit;
- ThreadDataInited:=True;
- DefConv:=OpenConverter('utf8');
- err:=0;
- col:=ucol_open(nil, err);
- if col <> nil then
- ucol_setStrength(col, 2);
- DefColl:=col;
- end;
- function GetConverter(cp: TSystemCodePage): PUConverter;
- var
- s: ansistring;
- begin
- if hlibICU = 0 then begin
- Result:=nil;
- exit;
- end;
- InitThreadData;
- if (cp = CP_UTF8) or (cp = CP_ACP) then
- Result:=DefConv
- else begin
- if cp <> LastCP then begin
- Str(cp, s);
- LastConv:=OpenConverter('cp' + s);
- LastCP:=cp;
- end;
- Result:=LastConv;
- end;
- end;
- procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
- var
- len2: SizeInt;
- conv: PUConverter;
- err: UErrorCode;
- begin
- if len = 0 then begin
- dest:='';
- exit;
- end;
- conv:=GetConverter(cp);
- if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
- // fallback implementation
- DefaultUnicode2AnsiMove(source,dest,cp,len);
- exit;
- end;
- len2:=len*3;
- SetLength(dest, len2);
- err:=0;
- if conv <> nil then
- len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
- else begin
- // Use UTF-8 conversion from RTL
- cp:=CP_UTF8;
- len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
- end;
- if len2 > Length(dest) then begin
- SetLength(dest, len2);
- err:=0;
- if conv <> nil then
- len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
- else
- len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
- end;
- if len2 < 0 then
- len2:=0;
- SetLength(dest, len2);
- SetCodePage(dest, cp, False);
- end;
- procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
- var
- len2: SizeInt;
- conv: PUConverter;
- err: UErrorCode;
- begin
- if len = 0 then begin
- dest:='';
- exit;
- end;
- conv:=GetConverter(cp);
- if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
- // fallback implementation
- DefaultAnsi2UnicodeMove(source,cp,dest,len);
- exit;
- end;
- len2:=len;
- SetLength(dest, len2);
- err:=0;
- if conv <> nil then
- len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
- else
- // Use UTF-8 conversion from RTL
- len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
- if len2 > Length(dest) then begin
- SetLength(dest, len2);
- err:=0;
- if conv <> nil then
- len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
- else
- len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
- end;
- if len2 < 0 then
- len2:=0;
- SetLength(dest, len2);
- end;
- function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
- var
- len, len2: SizeInt;
- err: UErrorCode;
- begin
- if hlibICU = 0 then begin
- // fallback implementation
- Result:=UnicodeString(UpCase(AnsiString(s)));
- exit;
- end;
- len:=Length(s);
- SetLength(Result, len);
- if len = 0 then
- exit;
- err:=0;
- len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
- if len2 > len then begin
- SetLength(Result, len2);
- err:=0;
- len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
- end;
- SetLength(Result, len2);
- end;
- function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
- var
- len, len2: SizeInt;
- err: UErrorCode;
- begin
- if hlibICU = 0 then begin
- // fallback implementation
- Result:=UnicodeString(LowerCase(AnsiString(s)));
- exit;
- end;
- len:=Length(s);
- SetLength(Result, len);
- if len = 0 then
- exit;
- err:=0;
- len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
- if len2 > len then begin
- SetLength(Result, len2);
- err:=0;
- len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
- end;
- SetLength(Result, len2);
- end;
- function _CompareStr(const S1, S2: UnicodeString): PtrInt;
- var
- count, count1, count2: SizeInt;
- begin
- result := 0;
- Count1 := Length(S1);
- Count2 := Length(S2);
- if Count1>Count2 then
- Count:=Count2
- else
- Count:=Count1;
- result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
- if result=0 then
- result:=Count1 - Count2;
- end;
- function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
- const
- U_COMPARE_CODE_POINT_ORDER = $8000;
- var
- err: UErrorCode;
- begin
- if hlibICU = 0 then begin
- // fallback implementation
- Result:=_CompareStr(s1, s2);
- exit;
- end;
- if (coIgnoreCase in Options) then begin
- err:=0;
- Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
- end
- else begin
- InitThreadData;
- if DefColl <> nil then
- Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
- else
- Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
- end;
- end;
- function UpperAnsiString(const s : AnsiString) : AnsiString;
- begin
- Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
- end;
- function LowerAnsiString(const s : AnsiString) : AnsiString;
- begin
- Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
- end;
- function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
- begin
- Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
- end;
- function StrCompAnsi(s1,s2 : PChar): PtrInt;
- begin
- Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
- end;
- function AnsiCompareText(const S1, S2: ansistring): PtrInt;
- begin
- Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
- end;
- function AnsiStrIComp(S1, S2: PChar): PtrInt;
- begin
- Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
- end;
- function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
- var
- as1, as2: ansistring;
- begin
- SetString(as1, S1, MaxLen);
- SetString(as2, S2, MaxLen);
- Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), []);
- end;
- function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
- var
- as1, as2: ansistring;
- begin
- SetString(as1, S1, MaxLen);
- SetString(as2, S2, MaxLen);
- Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), [coIgnoreCase]);
- end;
- function AnsiStrLower(Str: PChar): PChar;
- var
- s, res: ansistring;
- begin
- s:=Str;
- res:=LowerAnsiString(s);
- if Length(res) > Length(s) then
- SetLength(res, Length(s));
- Move(PAnsiChar(res)^, Str, Length(res) + 1);
- Result:=Str;
- end;
- function AnsiStrUpper(Str: PChar): PChar;
- var
- s, res: ansistring;
- begin
- s:=Str;
- res:=UpperAnsiString(s);
- if Length(res) > Length(s) then
- SetLength(res, Length(s));
- Move(PAnsiChar(res)^, Str, Length(res) + 1);
- Result:=Str;
- end;
- function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
- var
- c: byte;
- begin
- // Only UTF-8 encoding is supported
- c:=byte(Str^);
- if c = 0 then
- Result:=0
- else begin
- Result:=1;
- if c < $80 then
- exit; // 1-byte ASCII char
- while c and $C0 = $C0 do begin
- Inc(Result);
- c:=c shl 1;
- end;
- if Result > 6 then
- Result:=1 // Invalid code point
- else
- if Result > MaxLookAead then
- Result:=-1; // Incomplete code point
- end;
- end;
- function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
- begin
- Result := CP_UTF8; // Android always uses UTF-8
- end;
- procedure SetStdIOCodePage(var T: Text); inline;
- begin
- case TextRec(T).Mode of
- fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
- fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
- end;
- end;
- procedure SetStdIOCodePages; inline;
- begin
- SetStdIOCodePage(Input);
- SetStdIOCodePage(Output);
- SetStdIOCodePage(ErrOutput);
- SetStdIOCodePage(StdOut);
- SetStdIOCodePage(StdErr);
- end;
- procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
- var
- us: UnicodeString;
- begin
- Ansi2UnicodeMove(source,cp,us,len);
- dest:=us;
- end;
- function UpperWideString(const s : WideString) : WideString;
- begin
- Result:=UpperUnicodeString(s);
- end;
- function LowerWideString(const s : WideString) : WideString;
- begin
- Result:=LowerUnicodeString(s);
- end;
- function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
- begin
- Result:=CompareUnicodeString(s1, s2, Options);
- end;
- Procedure SetCWideStringManager;
- Var
- CWideStringManager : TUnicodeStringManager;
- begin
- CWideStringManager:=widestringmanager;
- With CWideStringManager do
- begin
- Wide2AnsiMoveProc:=@Unicode2AnsiMove;
- Ansi2WideMoveProc:=@Ansi2WideMove;
- UpperWideStringProc:=@UpperWideString;
- LowerWideStringProc:=@LowerWideString;
- CompareWideStringProc:=@CompareWideString;
- UpperAnsiStringProc:=@UpperAnsiString;
- LowerAnsiStringProc:=@LowerAnsiString;
- CompareStrAnsiStringProc:=@CompareStrAnsiString;
- CompareTextAnsiStringProc:=@AnsiCompareText;
- StrCompAnsiStringProc:=@StrCompAnsi;
- StrICompAnsiStringProc:=@AnsiStrIComp;
- StrLCompAnsiStringProc:=@AnsiStrLComp;
- StrLICompAnsiStringProc:=@AnsiStrLIComp;
- StrLowerAnsiStringProc:=@AnsiStrLower;
- StrUpperAnsiStringProc:=@AnsiStrUpper;
- Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
- Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
- UpperUnicodeStringProc:=@UpperUnicodeString;
- LowerUnicodeStringProc:=@LowerUnicodeString;
- CompareUnicodeStringProc:=@CompareUnicodeString;
- GetStandardCodePageProc:=@GetStandardCodePage;
- CodePointLengthProc:=@CodePointLength;
- end;
- SetUnicodeStringManager(CWideStringManager);
- end;
- procedure UnloadICU;
- begin
- if DefColl <> nil then
- ucol_close(DefColl);
- if DefConv <> nil then
- ucnv_close(DefConv);
- if LastConv <> nil then
- ucnv_close(LastConv);
- if LibVer = '_3_8' then
- exit; // ICU v3.8 on Android 1.5-2.1 is buggy and can't be unloaded properly
- if hlibICU <> 0 then begin
- UnloadLibrary(hlibICU);
- hlibICU:=0;
- end;
- if hlibICUi18n <> 0 then begin
- UnloadLibrary(hlibICUi18n);
- hlibICUi18n:=0;
- end;
- end;
- function GetIcuProc(const Name: AnsiString; out ProcPtr; libId: longint = 0): boolean;
- var
- p: pointer;
- hLib: TLibHandle;
- begin
- Result:=False;
- if libId = 0 then
- hLib:=hlibICU
- else
- hLib:=hlibICUi18n;
- if hLib = 0 then
- exit;
- p:=GetProcedureAddress(hlib, Name + LibVer);
- if p = nil then
- exit;
- pointer(ProcPtr):=p;
- Result:=True;
- end;
- function LoadICU: boolean;
- const
- ICUver: array [1..12] of ansistring = ('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55', '56', '58', '60');
- TestProcName = 'ucnv_open';
- var
- i: longint;
- s: ansistring;
- dir: PAnsiChar;
- err: UErrorCode;
- begin
- Result:=False;
- {$ifdef android}
- hlibICU:=LoadLibrary('libicuuc.so');
- hlibICUi18n:=LoadLibrary('libicui18n.so');
- {$else}
- hlibICU:=LoadLibrary('icuuc40.dll');
- hlibICUi18n:=LoadLibrary('icuin40.dll');
- LibVer:='_4_0';
- {$endif android}
- if (hlibICU = 0) or (hlibICUi18n = 0) then begin
- UnloadICU;
- exit;
- end;
- // Finding ICU version using known versions table
- for i:=High(ICUver) downto Low(ICUver) do begin
- s:='_' + ICUver[i];
- if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
- LibVer:=s;
- break;
- end;
- end;
- if LibVer = '' then begin
- // Finding unknown ICU version
- Val(ICUver[High(ICUver)], i);
- repeat
- Inc(i);
- Str(i, s);
- s:='_' + s;
- if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
- LibVer:=s;
- break;
- end;
- until i >= 100;
- end;
- if LibVer = '' then begin
- // Trying versionless name
- if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
- // Unable to get ICU version
- SysLogWrite(ANDROID_LOG_ERROR, 'cwstring: Unable to get ICU version.');
- UnloadICU;
- exit;
- end;
- end;
- if not GetIcuProc('ucnv_open', ucnv_open) then exit;
- if not GetIcuProc('ucnv_close', ucnv_close) then exit;
- if not GetIcuProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
- if not GetIcuProc('ucnv_setFallback', ucnv_setFallback) then exit;
- if not GetIcuProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
- if not GetIcuProc('ucnv_toUChars', ucnv_toUChars) then exit;
- if not GetIcuProc('u_strToUpper', u_strToUpper) then exit;
- if not GetIcuProc('u_strToLower', u_strToLower) then exit;
- if not GetIcuProc('u_strCompare', u_strCompare) then exit;
- if not GetIcuProc('u_strCaseCompare', u_strCaseCompare) then exit;
- if not GetIcuProc('u_getDataDirectory', u_getDataDirectory) then exit;
- if not GetIcuProc('u_setDataDirectory', u_setDataDirectory) then exit;
- if not GetIcuProc('u_init', u_init) then exit;
- if not GetIcuProc('ucol_open', ucol_open, 1) then exit;
- if not GetIcuProc('ucol_close', ucol_close, 1) then exit;
- if not GetIcuProc('ucol_strcoll', ucol_strcoll, 1) then exit;
- if not GetIcuProc('ucol_setStrength', ucol_setStrength, 1) then exit;
- // Checking if ICU data dir is set
- dir:=u_getDataDirectory();
- if (dir = nil) or (dir^ = #0) then
- u_setDataDirectory('/system/usr/icu');
- err:=0;
- u_init(err);
- Result:=True;
- end;
- var
- oldm: TUnicodeStringManager;
- {$ifdef android}
- SysGetIcuProc: pointer; external name 'ANDROID_GET_ICU_PROC';
- {$endif android}
- initialization
- GetUnicodeStringManager(oldm);
- DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
- DefaultUnicodeCodePage:=CP_UTF16;
- if LoadICU then begin
- SetCWideStringManager;
- {$ifdef android}
- SysGetIcuProc:=@GetIcuProc;
- SetStdIOCodePages;
- {$endif android}
- end;
- finalization
- SetUnicodeStringManager(oldm);
- UnloadICU;
- end.
|