123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 1999-2005 by Florian Klaempfl,
- Copyright (c) 2011 by Jonas Maebe,
- members of the Free Pascal development team.
- This file implements support routines for UTF-8 strings with FPC
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- { unicodestring is a plain java.lang.String }
- {$define FPC_UNICODESTRING_TYPE_DEFINED}
- { helpers for converting between Windows and Java code page identifiers }
- {$i jwin2javacharset.inc}
- {$define FPC_HAS_DEFAULT_UNICODE_2_ANSI_MOVE}
- procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
- var
- localencoder: JNCCharsetEncoder;
- inbuf: JNCharBuffer;
- outbuf: JNByteBuffer;
- begin
- localencoder:=widestringmanager.encoder.getForCodePage(cp);
- localencoder.reset;
- localencoder.onMalformedInput(JNCCodingErrorAction.fREPLACE);
- localencoder.onUnmappableCharacter(JNCCodingErrorAction.fREPLACE);
- inbuf:=JNCharBuffer.wrap(TJCharArray(source),0,len);
- outbuf:=localencoder.encode(inbuf);
- setlength(dest,outbuf.limit);
- { "The buffer's position will be zero and its limit will *follow* the last
- byte written" -> we already have a terminating zero }
- outbuf.get(TJByteArray(AnsiStringClass(dest).fdata),0,outbuf.limit);
- { already null-terminated because of setlength }
- SetCodePage(dest,cp,false);
- end;
- {$define FPC_HAS_DEFAULT_ANSI_2_UNICODE}
- procedure DefaultAnsi2UnicodeMove(source:PAnsiChar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
- var
- localdecoder: JNCCharsetDecoder;
- inbuf: JNByteBuffer;
- outbuf: JNCharBuffer;
- begin
- localdecoder:=widestringmanager.decoder.getForCodePage(cp);
- localdecoder.reset;
- localdecoder.onMalformedInput(JNCCodingErrorAction.fREPLACE);
- localdecoder.onUnmappableCharacter(JNCCodingErrorAction.fREPLACE);
- inbuf:=JNByteBuffer.wrap(TJByteArray(source),0,len);
- outbuf:=localdecoder.decode(inbuf);
- dest:=outbuf.toString;
- end;
- {
- This file contains the implementation of the UnicodeString type,
- which on the Java platforms is an alias for java.lang.String
- }
- {$define FPC_HAS_NEW_UNICODESTRING}
- Function NewUnicodeString(Len : SizeInt) : JLString;
- {
- Allocate a new UnicodeString on the heap.
- initialize it to zero length and reference count 1.
- }
- var
- data: array of jchar;
- begin
- setlength(data,len);
- result:=JLString.create(data);
- end;
- { lie, not required }
- {$define FPC_HAS_UNICODESTR_DECR_REF}
- {$define FPC_HAS_UNICODESTR_INCR_REF}
- {$define FPC_HAS_UNICODESTR_TO_SHORTSTR}
- procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
- {
- Converts a UnicodeString to a ShortString;
- }
- Var
- Size : SizeInt;
- temp : ansistring;
- begin
- res:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- temp:=s2;
- res:=temp;
- end;
- end;
- {$define FPC_HAS_SHORTSTR_TO_UNICODESTR}
- Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
- {
- Converts a ShortString to a UnicodeString;
- }
- Var
- Size : SizeInt;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(ShortstringClass(@S2).fdata),DefaultSystemCodePage,result,Size);
- end;
- {$define FPC_HAS_UNICODESTR_TO_ANSISTR}
- Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
- {
- Converts a UnicodeString to an AnsiString
- }
- Var
- Size : SizeInt;
- begin
- cp:=TranslatePlaceholderCP(cp);
- { avoid codepage conversion -- why isn't the result rawbytestring? }
- pointer(result):=pointer(AnsistringClass.Create(s2,cp));
- end;
- {$define FPC_HAS_ANSISTR_TO_UNICODESTR}
- Function fpc_AnsiStr_To_UnicodeStr (Const S2 : RawByteString): UnicodeString; compilerproc;
- {
- Converts an AnsiString to a UnicodeString;
- }
- Var
- Size : SizeInt;
- begin
- if length(s2)=0 then
- result:=''
- else
- result:=AnsistringClass(S2).toString;
- end;
- {$define FPC_HAS_UNICODESTR_TO_WIDESTR}
- Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
- begin
- result:=s2;
- end;
- {$define FPC_HAS_WIDESTR_TO_UNICODESTR}
- Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
- begin
- result:=s2;
- end;
- {$define FPC_HAS_PWIDECHAR_TO_UNICODESTR}
- Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- size:=0;
- while p[size]<>#0 do
- inc(size);
- Setlength(result,Size);
- if Size>0 then
- result:=JLString.Create(TJCharArray(p),0,Size);
- end;
- {$define FPC_HAS_UNICODESTR_OF_CHAR}
- Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
- var
- arr : array of jchar;
- begin
- if l>0 then
- begin
- SetLength(arr,l);
- FillWord(arr,l,word(c));
- Setlength(StringOfChar,l);
- StringOfChar:=JLString.Create(TJCharArray(arr),0,l);
- end
- else
- StringOfChar:='';
- end;
- { lie, not used by compiler }
- {$define FPC_HAS_PUNICODECHAR_TO_SHORTSTR}
- {$define FPC_HAS_PWIDECHAR_TO_ANSISTR}
- Function fpc_PWideChar_To_AnsiStr(const p : pwidechar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): ansistring; compilerproc;
- begin
- result:='';
- if (p=nil) or
- (p^=#0) then
- exit;
- cp:=TranslatePlaceholderCP(cp);
- pointer(result):=pointer(AnsistringClass.Create(unicodestring(p),cp));
- end;
- {$define FPC_HAS_PWIDECHAR_TO_SHORTSTR}
- procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
- begin
- res:='';
- if (p=nil) or
- (p^=#0) then
- exit;
- res:=unicodestring(p);
- end;
- { lie, not required for JVM target }
- {$define FPC_HAS_UNICODESTR_ASSIGN}
- {$define FPC_HAS_UNICODESTR_CONCAT}
- procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
- Var
- sb: JLStringBuilder;
- begin
- { only assign if s1 or s2 is empty }
- if (length(S1)=0) then
- begin
- DestS:=s2;
- exit;
- end;
- if (length(S2)=0) then
- begin
- DestS:=s1;
- exit;
- end;
- sb:=JLStringBuilder.create(S1);
- sb.append(s2);
- DestS:=sb.toString;
- end;
- {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
- procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
- Var
- i : Longint;
- Size,NewSize : SizeInt;
- sb: JLStringBuilder;
- begin
- { First calculate size of the result so we can allocate a StringBuilder of
- the right size }
- NewSize:=0;
- for i:=low(sarr) to high(sarr) do
- inc(Newsize,length(sarr[i]));
- sb:=JLStringBuilder.create(NewSize);
- for i:=low(sarr) to high(sarr) do
- begin
- if length(sarr[i])>0 then
- sb.append(sarr[i]);
- end;
- dests:=sb.toString;
- end;
- {$define FPC_HAS_CHAR_TO_UCHAR}
- Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
- var
- arr: array[0..0] of ansichar;
- w: unicodestring;
- begin
- arr[0]:=c;
- widestringmanager.Ansi2UnicodeMoveProc(pansichar(@arr),DefaultSystemCodePage,w,1);
- fpc_Char_To_UChar:=w[1];
- end;
- {$define FPC_HAS_CHAR_TO_UNICODESTR}
- Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
- {
- Converts a AnsiChar to a UnicodeString;
- }
- var
- arr: array[0..0] of ansichar;
- begin
- arr[0]:=c;
- widestringmanager.Ansi2UnicodeMoveProc(pansichar(@arr),DefaultSystemCodePage,result,1);
- end;
- {$define FPC_HAS_UCHAR_TO_CHAR}
- Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
- {
- Converts a UnicodeChar to a AnsiChar;
- }
- var
- u: unicodestring;
- s: RawByteString;
- arr: array[0..0] of unicodechar;
- begin
- arr[0]:=c;
- widestringmanager.Unicode2AnsiMoveProc(punicodechar(@arr), s, DefaultSystemCodePage, 1);
- if length(s)=1 then
- fpc_UChar_To_Char:= s[1]
- else
- fpc_UChar_To_Char:='?';
- end;
- { lie, unused for this target since widechar = unicodechar }
- {$define FPC_HAS_UCHAR_TO_UNICODESTR}
- Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
- {
- Converts a UnicodeChar to a UnicodeString;
- }
- var
- arr: array[0..0] of UnicodeChar;
- begin
- arr[0]:=c;
- result:=JLString.create(arr);
- end;
- {$define FPC_HAS_UCHAR_TO_ANSISTR}
- Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
- {
- Converts a UnicodeChar to a AnsiString;
- }
- var
- u: unicodestring;
- arr: array[0..0] of unicodechar;
- begin
- arr[0]:=c;
- cp:=TranslatePlaceholderCP(cp);
- widestringmanager.Unicode2AnsiMoveProc(punicodechar(@arr), RawByteString(fpc_UChar_To_AnsiStr), cp, 1);
- end;
- {$define FPC_HAS_UCHAR_TO_SHORTSTR}
- function fpc_UChar_To_ShortStr(const c : UnicodeChar): shortstring; compilerproc;
- {
- Converts a UnicodeChar to a AnsiString;
- }
- var
- u: unicodestring;
- begin
- u:=c;
- result:=u;
- end;
- {$ifndef FPC_HAS_UCHAR_TO_ANSISTR}
- {$define FPC_HAS_UCHAR_TO_ANSISTR}
- Function fpc_UChar_To_AnsiStr(const c : UnicodeChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
- {
- Converts a UnicodeChar to a AnsiString;
- }
- var
- arr: array[0..0] of unicodechar;
- {$ifndef FPC_HAS_CPSTRING}
- cp : TSystemCodePage;
- {$endif FPC_HAS_CPSTRING}
- begin
- {$ifndef FPC_HAS_CPSTRING}
- cp:=DefaultSystemCodePage;
- {$endif FPC_HAS_CPSTRING}
- cp:=TranslatePlaceholderCP(cp);
- arr[0]:=c;
- widestringmanager.Unicode2AnsiMoveProc(punicodechar(@arr[0]), fpc_UChar_To_AnsiStr, cp, 1);
- end;
- {$endif FPC_HAS_UCHAR_TO_ANSISTR}
- {$define FPC_HAS_PCHAR_TO_UNICODESTR}
- Function fpc_PChar_To_UnicodeStr(const p : PAnsiChar): UnicodeString; compilerproc;
- var
- i, len: longint;
- arr: TAnsiCharArray;
- begin
- arr:=TAnsiCharArray(p);
- i:=0;
- while arr[i]<>#0 do
- inc(i);
- if i<>0 then
- widestringmanager.Ansi2UnicodeMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_UnicodeStr,i)
- else
- result:=''
- end;
- Function real_widechararray_to_unicodestr(const arr: array of widechar; zerobased: boolean): Unicodestring;
- var
- i : SizeInt;
- foundnull : boolean;
- begin
- if (zerobased) then
- begin
- foundnull:=false;
- i:=0;
- for i:=low(arr) to high(arr) do
- if arr[i]=#0 then
- begin
- foundnull:=true;
- break;
- end;
- if not foundnull then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- result:=JLString.create(arr,0,i);
- end;
- {$define FPC_HAS_WIDECHARARRAY_TO_UNICODESTR}
- Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
- begin
- result:=real_widechararray_to_unicodestr(arr,zerobased);
- end;
- { due to their names, the following procedures should be in wstrings.inc,
- however, the compiler generates code using this functions on all platforms }
- {$define FPC_HAS_WIDECHARARRAY_TO_SHORTSTR}
- procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
- begin
- res:=real_widechararray_to_unicodestr(arr,zerobased);
- end;
- {$define FPC_HAS_WIDECHARARRAY_TO_WIDESTR}
- Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
- begin
- result:=real_widechararray_to_unicodestr(arr,zerobased);
- end;
- {$define FPC_HAS_UNICODESTR_TO_CHARARRAY}
- procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
- var
- len: longint;
- temp: array of jbyte;
- csname: unicodestring;
- begin
- len:=length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len>0 then
- begin
- csname:=win2javacs(DefaultSystemCodePage);
- if csname='<unsupported>' then
- csname:='US-ASCII';
- temp:=JLString(src).getBytes(csname);
- len:=length(temp);
- if len>length(res) then
- len:=length(res);
- JLSystem.ArrayCopy(JLObject(temp),0,JLObject(@res),0,len);
- end;
- if len<=high(res) then
- JUArrays.fill(TJByteArray(@res),len,high(res),0);
- end;
- function fpc_unicodestr_setchar(const s: UnicodeString; const index: longint; const ch: unicodechar): UnicodeString; compilerproc;
- var
- sb: JLStringBuilder;
- begin
- sb:=JLStringBuilder.create(s);
- { string indexes are 1-based in Pascal, 0-based in Java }
- sb.setCharAt(index-1,ch);
- result:=sb.toString();
- end;
- {$define FPC_HAS_ANSISTR_TO_WIDECHARARRAY}
- procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: RawByteString); compilerproc;
- var
- len: SizeInt;
- temp: widestring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- temp:=src;
- len := length(temp);
- if len > high(res)+1 then
- len := high(res)+1;
- JLString(temp).getChars(0,len,res,0);
- JUArrays.fill(res,len,high(res),#0);
- end;
- {$define FPC_HAS_SHORTSTR_TO_WIDECHARARRAY}
- procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
- var
- len: longint;
- temp : unicodestring;
- begin
- len := length(src);
- { temp is initialized with an empty string, so no need to convert src in case
- it's also empty}
- if len > 0 then
- temp:=src;
- len := length(temp);
- if len > high(res)+1 then
- len := high(res)+1;
- JLString(temp).getChars(0,len,res,0);
- JUArrays.fill(res,len,high(res),#0);
- end;
- {$define FPC_HAS_UNICODESTR_TO_WIDECHARARRAY}
- procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
- var
- i, len: SizeInt;
- begin
- len := length(src);
- if len > length(res) then
- len := length(res);
- JLString(src).getChars(0,len,res,0);
- end;
- {$define FPC_HAS_UNICODESTR_COMPARE}
- Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt; compilerproc;
- {
- Compares 2 UnicodeStrings;
- The result is
- <0 if S1<S2
- 0 if S1=S2
- >0 if S1>S2
- }
- Var
- MaxI,Temp : SizeInt;
- begin
- if JLObject(S1)=JLObject(S2) then
- begin
- result:=0;
- exit;
- end;
- result:=JLString(S1).compareTo(S2);
- end;
- {$define FPC_HAS_UNICODESTR_COMPARE_EQUAL}
- Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt; compilerproc;
- {
- Compares 2 UnicodeStrings for equality only;
- The result is
- 0 if S1=S2
- <>0 if S1<>S2
- }
- Var
- MaxI : SizeInt;
- begin
- result:=ord(not JLString(S1).equals(JLString(S2)));
- end;
- { lie, not required for this target }
- {$define FPC_HAS_UNICODESTR_RANGECHECK}
- {$define FPC_HAS_UNICODESTR_SETLENGTH}
- Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
- {
- Sets The length of string S to L.
- Makes sure S is unique, and contains enough room.
- Returns new val
- }
- Var
- result: UnicodeString;
- movelen: SizeInt;
- chars: array of widechar;
- strlen: SizeInt;
- begin
- if (l>0) then
- begin
- if JLObject(S)=nil then
- begin
- { Need a completely new string...}
- result:=NewUnicodeString(l);
- end
- { no need to create a new string, since Java strings are immutable }
- else
- begin
- strlen:=length(s);
- if l=strlen then
- result:=s
- else if (l<strlen) then
- result:=JLString(s).substring(0,l)
- else
- begin
- setlength(chars,l);
- JLString(s).getChars(0,strlen,chars,0);
- result:=JLString.create(chars,0,l)
- end;
- end
- end
- else
- begin
- result:='';
- end;
- S:=Result;
- end;
- {*****************************************************************************
- Public functions, In interface.
- *****************************************************************************}
- {$define FPC_HAS_STRING_LEN_TO_WIDECHAR}
- function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
- var
- temp: widestring;
- Len: SizeInt;
- begin
- temp:=src;
- Len:=Length(temp);
- if DestSize<=Len then
- Len:=Destsize-1;
- JLString(temp).getChars(0,Len,TJCharArray(Dest),0);
- Dest[Len]:=#0;
- result:=Dest;
- end;
- {$define FPC_HAS_UNICODEFROMLOCALECHARS}
- function UnicodeFromLocaleChars(CodePage, Flags: Cardinal; LocaleStr: PAnsiChar;
- LocaleStrLen: Integer; UnicodeStr: PWideChar; UnicodeStrLen: Integer): Integer; overload;
- var
- temp: widestring;
- Len: SizeInt;
- begin
- widestringmanager.Ansi2WideMoveProc(LocaleStr,CodePage,temp,LocaleStrLen);
- Len:=Length(temp);
- // Only move when we have room.
- if (UnicodeStrLen>0) then
- begin
- if UnicodeStrLen<=Len then
- Len:=UnicodeStrLen-1;
- JLString(temp).getChars(0,Len,TJCharArray(UnicodeStr),0);
- UniCodeStr[Len]:=#0;
- end;
- // Return length
- result:=len;
- end;
- {$define FPC_HAS_UNICODECHAR_LEN_TO_STRING}
- function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
- begin
- result:=JLString.Create(TJCharArray(S),0,len);
- end;
- {$define FPC_HAS_WIDECHAR_LEN_TO_STRING}
- function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
- begin
- result:=JLString.Create(TJCharArray(S),0,len);
- end;
- {$define FPC_HAS_UNICODESTR_UNIQUE}
- Function fpc_unicodestr_Unique(var S : JLObject): JLObject; compilerproc;
- begin
- result:=s;
- end;
- { the publicly accessible uniquestring function is declared as
- "external name 'FPC_UNICODESTR_UNIQUE'", which is normally an alias for
- the fpc_unicodestr_Unique compiler proc; since one is a function and the
- other a procedure that sort of hackery doesn't work for the JVM -> create
- a separate procedure for that (since Java strings are immutable, they are
- always unique though) }
- procedure FPC_UNICODESTR_UNIQUE(var S : UnicodeString);
- begin
- { do nothing }
- end;
- {$define FPC_HAS_UNICODESTR_COPY}
- Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
- begin
- dec(index);
- if Index < 0 then
- Index := 0;
- { Check Size. Accounts for Zero-length S, the double check is needed because
- Size can be maxint and will get <0 when adding index }
- if (Size>Length(S)) or
- (Index+Size>Length(S)) then
- Size:=Length(S)-Index;
- If Size>0 then
- result:=JLString(s).subString(Index,Index+Size)
- else
- result:='';
- end;
- {$define FPC_HAS_POS_UNICODESTR_UNICODESTR}
- Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
- begin
- Pos:=0;
- if (Length(SubStr)>0) and (Offset>0) and (Offset<=Length(Source)) then
- Pos:=JLString(Source).indexOf(SubStr,Offset-1)+1
- end;
- { Faster version for a unicodechar alone }
- {$define FPC_HAS_POS_UNICODECHAR_UNICODESTR}
- Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
- begin
- Pos:=0;
- if (Offset>0) and (Offset<=Length(s)) then
- Pos:=JLString(s).indexOf(ord(c),Offset-1)+1;
- end;
- { Faster version for a AnsiChar alone. Must be implemented because }
- { pos(c: AnsiChar; const s: shortstring) also exists, so otherwise }
- { using pos(AnsiChar,PAnsiChar) will always call the shortstring version }
- { (exact match for first argument), also with $h+ (JM) }
- {$define FPC_HAS_POS_CHAR_UNICODESTR}
- Function Pos (c : AnsiChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
- var
- i: SizeInt;
- wc : unicodechar;
- begin
- wc:=c;
- result:=Pos(wc,s,Offset);
- end;
- {$define FPC_HAS_DELETE_UNICODESTR}
- Procedure {$ifdef VER3_0}Delete{$else}fpc_unicodestr_delete{$endif} (Var S : UnicodeString; Index,Size: SizeInt);
- Var
- LS : SizeInt;
- sb: JLStringBuilder;
- begin
- LS:=Length(S);
- if (Index>LS) or (Index<=0) or (Size<=0) then
- exit;
- { (Size+Index) will overflow if Size=MaxInt. }
- if Size>LS-Index then
- Size:=LS-Index+1;
- if Size<=LS-Index then
- begin
- Dec(Index);
- sb:=JLStringBuilder.Create(s);
- sb.delete(index,size);
- s:=sb.toString;
- end
- else
- s:=JLString(s).substring(0,index-1);
- end;
- {$define FPC_HAS_INSERT_UNICODESTR}
- Procedure {$ifdef VER3_0}Insert{$else}fpc_unicodestr_insert{$endif} (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
- var
- Temp : UnicodeString;
- LS : SizeInt;
- sb : JLStringBuilder;
- begin
- If Length(Source)=0 then
- exit;
- if index <= 0 then
- index := 1;
- Ls:=Length(S);
- if index > LS then
- index := LS+1;
- Dec(Index);
- sb:=JLStringBuilder.Create(S);
- sb.insert(Index,Source);
- S:=sb.toString;
- end;
- {$define FPC_HAS_UPCASE_UNICODECHAR}
- Function UpCase(c:UnicodeChar):UnicodeChar;
- begin
- result:=JLCharacter.toUpperCase(c);
- end;
- {$define FPC_HAS_UPCASE_UNICODESTR}
- function UpCase(const s : UnicodeString) : UnicodeString;
- begin
- result:=JLString(s).toUpperCase;
- end;
- {$define FPC_HAS_LOWERCASE_UNICODECHAR}
- Function LowerCase(c:UnicodeChar):UnicodeChar;
- begin
- result:=JLCharacter.toLowerCase(c);
- end;
- {$define FPC_HAS_LOWERCASE_UNICODESTR}
- function LowerCase(const s : UnicodeString) : UnicodeString;
- begin
- result:=JLString(s).toLowerCase;
- end;
- {$define FPC_HAS_SETSTRING_UNICODESTR_PUNICODECHAR}
- Procedure fpc_setstring_unicodestr_pwidechar(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); compilerproc;
- begin
- if assigned(buf) and (Len>0) then
- s:=JLString.Create(TJCharArray(Buf),0,Len)
- else
- s:='';
- end;
- {$define FPC_HAS_UTF8ENCODE_UNICODESTRING}
- function UTF8Encode(const s : UnicodeString) : RawByteString;
- var
- i : SizeInt;
- hs : UTF8String;
- chars: array of widechar;
- begin
- result:='';
- if s='' then
- exit;
- SetLength(hs,length(s)*3);
- chars:=JLString(s).toCharArray;
- i:=UnicodeToUtf8(PAnsiChar(hs),length(hs)+1,pwidechar(chars),length(s));
- if i>0 then
- begin
- SetLength(hs,i-1);
- result:=hs;
- end;
- end;
- {$define FPC_HAS_UTF8DECODE_UNICODESTRING}
- function UTF8Decode(const s : RawByteString): UnicodeString;
- var
- i : SizeInt;
- chars: array of widechar;
- begin
- result:='';
- if s='' then
- exit;
- SetLength(chars,length(s)+1);
- i:=Utf8ToUnicode(pwidechar(chars),length(s)+1,PAnsiChar(s),length(s));
- if i>0 then
- result:=JLString.Create(chars,0,i-1);
- end;
- {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
- { concatenates an utf-32 AnsiChar to a unicodestring. S *must* be unique when entering. }
- procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: JLStringBuilder; var index: SizeInt);
- begin
- { if nc > $ffff, we need two places }
- if (index+ord(nc > $ffff)>s.length) then
- if (s.length < 10*256) then
- s.setLength(s.length+10)
- else
- s.setlength(s.length+s.length shr 8);
- if (nc<$ffff) then
- begin
- s.setCharAt(index-1,unicodechar(nc));
- inc(index);
- end
- else if (dword(nc)<=$10ffff) then
- begin
- s.setCharAt(index-1,unicodechar((nc - $10000) shr 10 + $d800));
- s.setCharAt(index,unicodechar((nc - $10000) and $3ff + $dc00));
- inc(index,2);
- end
- else
- { invalid code point }
- begin
- s.setCharAt(index-1,'?');
- inc(index);
- end;
- end;
- function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
- var
- i : SizeInt;
- resindex : SizeInt;
- tmpres: JLStringBuilder;
- begin
- { skip terminating #0 }
- tmpres:=JLStringBuilder.Create(length(s)-1);
- resindex:=1;
- for i:=0 to high(s)-1 do
- ConcatUTF32ToUnicodeStr(s[i],tmpres,resindex);
- { adjust result length (may be too big due to growing }
- { for surrogate pairs) }
- tmpres.setLength(resindex-1);
- result:=tmpres.toString;
- end;
- procedure UCS4Encode(p: PWideChar; len: sizeint; out res: UCS4String); forward;
- {$define FPC_HAS_UCS4STRING_TO_UNICODESTR}
- function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
- begin
- UCS4Encode(PWideChar(JLString(s).toCharArray),Length(s),result);
- end;
- {$define FPC_HAS_WIDESTR_TO_UCS4STRING}
- function WideStringToUCS4String(const s : WideString) : UCS4String;
- begin
- UCS4Encode(PWideChar(JLString(s).toCharArray),Length(s),result);
- end;
- {$define FPC_HAS_UCS4STRING_TO_WIDESTR}
- function UCS4StringToWideString(const s : UCS4String) : WideString;
- begin
- result:=UCS4StringToUnicodeString(s);
- end;
- function StringElementSize(const S : UnicodeString): Word;
- begin
- result:=sizeof(unicodechar);
- end;
- function StringRefCount(const S : UnicodeString): SizeInt;
- begin
- if assigned(pointer(s)) then
- result:=1
- else
- result:=0;
- end;
- function StringCodePage(const S : UnicodeString): TSystemCodePage;
- begin
- if assigned(pointer(s)) then
- result:=CP_UTF16BE
- else
- result:=DefaultUnicodeCodePage;
- end;
- {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODESTRING}
- Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
- Begin
- result:=AnsiString(AnsistringClass.Create(Str,DefaultFileSystemCodePage));
- End;
- {$define FPC_HAS_TOSINGLEBYTEFILESYSTEMENCODEDFILENAME_UNICODECHARARRAY}
- Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
- Begin
- result:=AnsiString(AnsistringClass.Create(arr,DefaultFileSystemCodePage));
- End;
- { *************************************************************************** }
- { ************************* Collator threadvar ****************************** }
- { *************************************************************************** }
- function TCollatorThreadVar.InitialValue: JLObject;
- begin
- { get a copy, since we modify the collator (e.g. setting the strength) }
- result:=JTCollator.getInstance.clone
- end;
- { *************************************************************************** }
- { ************************ Helpers for en/decode **************************** }
- { *************************************************************************** }
- function GetOrInsertNewEnDecoder(hm: JUWeakHashMap; cp: TSystemCodePage; decoder: boolean): JLObject;
- var
- cs: JNCCharSet;
- replacement: array[0..0] of jbyte;
- begin
- result:=hm.get(JLInteger.valueOf(cp));
- if not assigned(result) then
- begin
- try
- cs:=JNCCharSet.forName(win2javacs(cp));
- except
- { does not exist or not supported, fall back to ASCII like on other
- platforms}
- cs:=JNCCharset.forName('US-ASCII')
- end;
- if decoder then
- begin
- result:=cs.newDecoder;
- JNCCharsetDecoder(result).replaceWith('?');
- end
- else
- begin
- result:=cs.newEncoder;
- replacement[0]:=ord('?');
- JNCCharsetEncoder(result).replaceWith(replacement);
- end;
- { store in weak hashmap for future (possible) reuse }
- hm.put(JLInteger.Create(cp),result);
- end;
- end;
- { *************************************************************************** }
- { ************************** Decoder threadvar ****************************** }
- { *************************************************************************** }
- function TCharsetDecoderThreadvar.InitialValue: JLObject;
- begin
- result:=JUWeakHashMap.Create;
- end;
- function TCharsetDecoderThreadvar.getForCodePage(cp: TSystemCodePage): JNCCharsetDecoder;
- var
- hm: JUWeakHashMap;
- begin
- hm:=JUWeakHashMap(get);
- result:=JNCCharsetDecoder(GetOrInsertNewEnDecoder(hm,cp,true));
- end;
- { *************************************************************************** }
- { ************************** Encoder threadvar ****************************** }
- { *************************************************************************** }
- function TCharsetEncoderThreadvar.InitialValue: JLObject;
- begin
- result:=JUWeakHashMap.Create;
- end;
- function TCharsetEncoderThreadvar.getForCodePage(cp: TSystemCodePage): JNCCharsetEncoder;
- var
- hm: JUWeakHashMap;
- begin
- hm:=JUWeakHashMap(get);
- result:=JNCCharsetEncoder(GetOrInsertNewEnDecoder(hm,cp,false));
- end;
- { *************************************************************************** }
- { ************************ TUnicodeStringManager **************************** }
- { *************************************************************************** }
- class constructor TUnicodeStringManager.ClassCreate;
- begin
- collator:=TCollatorThreadVar.Create;
- decoder:=TCharsetDecoderThreadVar.Create;
- encoder:=TCharsetEncoderThreadVar.Create;
- DefaultSystemCodePage:=javacs2win(JNCCharset.defaultCharset.name);
- { unknown/unsupported -> default to ASCII (this will be used to parse
- stdin etc, so setting this to utf-8 or so won't help) }
- if DefaultSystemCodePage=65535 then
- DefaultSystemCodePage:=20127;
- DefaultFileSystemCodePage:=DefaultSystemCodePage;
- DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;
- DefaultUnicodeCodePage:=CP_UTF16BE;
- end;
- procedure TUnicodeStringManager.Wide2AnsiMoveProc(source:pwidechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
- begin
- DefaultUnicode2AnsiMove(source,dest,cp,len);
- end;
- procedure TUnicodeStringManager.Ansi2WideMoveProc(source:PAnsiChar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
- begin
- DefaultAnsi2UnicodeMove(source,cp,dest,len);
- end;
- function TUnicodeStringManager.UpperWideStringProc(const S: WideString): WideString;
- begin
- result:=upcase(s);
- end;
- function TUnicodeStringManager.LowerWideStringProc(const S: WideString): WideString;
- begin
- result:=lowercase(s);
- end;
- function TUnicodeStringManager.CompareWideStringProc(const s1, s2 : WideString) : PtrInt;
- var
- localcollator: JTCollator;
- begin
- localcollator:=JTCollator(collator.get);
- localcollator.setStrength(JTCollator.IDENTICAL);
- result:=localcollator.compare(s1,s2);
- end;
- function TUnicodeStringManager.CompareTextWideStringProc(const s1, s2 : WideString): PtrInt;
- var
- localcollator: JTCollator;
- begin
- localcollator:=JTCollator(collator.get);
- localcollator.setStrength(JTCollator.TERTIARY);
- result:=localcollator.compare(s1,s2);
- end;
- function TUnicodeStringManager.CharLengthPCharProc(const Str: PAnsiChar; Index: PtrInt): PtrInt;
- var
- localdecoder: JNCCharsetDecoder;
- begin
- localdecoder:=JNCCharsetDecoder(decoder.get);
- localdecoder.reset;
- localdecoder.onMalformedInput(JNCCodingErrorAction.fREPLACE);
- localdecoder.onUnmappableCharacter(JNCCodingErrorAction.fREPLACE);
- result:=localdecoder.decode(JNByteBuffer.wrap(TJByteArray(Str),Index,length(Str)-Index)).length;
- end;
- function TUnicodeStringManager.CodePointLengthProc(const Str: PAnsiChar; Index, MaxLookAhead: PtrInt): Ptrint;
- var
- localdecoder: JNCCharsetDecoder;
- inbuf: JNByteBuffer;
- outbuf: JNCharBuffer;
- coderres: JNCCoderResult;
- limit, maxlimit: longint;
- begin
- localdecoder:=JNCCharsetDecoder(decoder.get);
- localdecoder.reset;
- localdecoder.onMalformedInput(JNCCodingErrorAction.fREPORT);
- localdecoder.onUnmappableCharacter(JNCCodingErrorAction.fREPORT);
- localdecoder.reset;
- limit:=0;
- maxlimit:=min(length(Str)-Index,MaxLookAhead);
- { end of PAnsiChar? }
- if maxlimit=0 then
- begin
- result:=0;
- exit;
- end;
- inbuf:=JNByteBuffer.wrap(TJByteArray(Str),Index,Index+maxlimit);
- { we will get at most 2 output characters (when decoding from UTF-32 to
- UTF-16) }
- outbuf:=JNCharBuffer.allocate(2);
- { keep trying to decode until we managed to decode one character or
- reached the limit }
- repeat
- inc(limit);
- inbuf.limit(limit);
- coderres:=localdecoder.decode(inbuf,outbuf,true);
- until not coderres.isError or
- (limit=MaxLookAhead);
- if not coderres.isError then
- result:=inbuf.limit
- else
- result:=-1;
- end;
- function TUnicodeStringManager.UpperAnsiStringProc(const s : ansistring) : ansistring;
- begin
- result:=UpperWideStringProc(s);
- end;
- function TUnicodeStringManager.LowerAnsiStringProc(const s : ansistring) : ansistring;
- begin
- result:=LowerWideStringProc(s);
- end;
- function TUnicodeStringManager.CompareStrAnsiStringProc(const S1, S2: ansistring): PtrInt;
- begin
- result:=CompareUnicodeStringProc(S1,S2);
- end;
- function TUnicodeStringManager.CompareTextAnsiStringProc(const S1, S2: ansistring): PtrInt;
- begin
- result:=CompareTextUnicodeStringProc(S1,S2);
- end;
- function TUnicodeStringManager.StrCompAnsiStringProc(S1, S2: PAnsiChar): PtrInt;
- var
- str1,str2: unicodestring;
- begin
- str1:=JLString.Create(TJCharArray(S1),0,length(S1));
- str2:=JLString.Create(TJCharArray(S2),0,length(S2));
- result:=CompareUnicodeStringProc(str1,str2);
- end;
- function TUnicodeStringManager.StrICompAnsiStringProc(S1, S2: PAnsiChar): PtrInt;
- var
- str1,str2: unicodestring;
- begin
- str1:=JLString.Create(TJCharArray(S1),0,length(S1));
- str2:=JLString.Create(TJCharArray(S2),0,length(S2));
- result:=CompareTextUnicodeStringProc(str1,str2);
- end;
- function TUnicodeStringManager.StrLCompAnsiStringProc(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
- var
- str1,str2: unicodestring;
- begin
- str1:=JLString.Create(TJCharArray(S1),0,min(length(S1),MaxLen));
- str2:=JLString.Create(TJCharArray(S2),0,min(length(S2),MaxLen));
- result:=CompareUnicodeStringProc(str1,str2);
- end;
- function TUnicodeStringManager.StrLICompAnsiStringProc(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
- var
- str1,str2: unicodestring;
- begin
- str1:=JLString.Create(TJCharArray(S1),0,min(length(S1),MaxLen));
- str2:=JLString.Create(TJCharArray(S2),0,min(length(S2),MaxLen));
- result:=CompareTextUnicodeStringProc(str1,str2);
- end;
- function TUnicodeStringManager.StrLowerAnsiStringProc(Str: PAnsiChar): PAnsiChar;
- var
- ustr: unicodestring;
- begin
- ustr:=JLString.Create(TJCharArray(Str),0,length(Str));
- result:=PAnsiChar(AnsiStringClass(ansistring(LowerWideStringProc(ustr))).fdata);
- end;
- function TUnicodeStringManager.StrUpperAnsiStringProc(Str: PAnsiChar): PAnsiChar;
- var
- ustr: unicodestring;
- begin
- ustr:=JLString.Create(TJCharArray(Str),0,length(Str));
- result:=PAnsiChar(AnsiStringClass(ansistring(UpperWideStringProc(ustr))).fdata);
- end;
- procedure TUnicodeStringManager.Unicode2AnsiMoveProc(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
- begin
- DefaultUnicode2AnsiMove(source,dest,cp,len);
- end;
- procedure TUnicodeStringManager.Ansi2UnicodeMoveProc(source:PAnsiChar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
- begin
- DefaultAnsi2UnicodeMove(source,cp,dest,len);
- end;
- function TUnicodeStringManager.UpperUnicodeStringProc(const S: UnicodeString): UnicodeString;
- begin
- result:=UpperWideStringProc(S);
- end;
- function TUnicodeStringManager.LowerUnicodeStringProc(const S: UnicodeString): UnicodeString;
- begin
- result:=LowerWideStringProc(S);
- end;
- function TUnicodeStringManager.CompareUnicodeStringProc(const s1, s2 : UnicodeString) : PtrInt;
- begin
- result:=CompareWideStringProc(s1,s2);
- end;
- function TUnicodeStringManager.CompareTextUnicodeStringProc(const s1, s2 : UnicodeString): PtrInt;
- begin
- result:=CompareTextWideStringProc(s1,s2);
- end;
- procedure initunicodestringmanager;
- begin
- widestringmanager:=TUnicodeStringManager.Create;
- end;
|