1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 1999-2005 by Florian Klaempfl,
- member of the Free Pascal development team.
- This file implements support routines for WideStrings with FPC
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {
- This file contains the implementation of the WideString type,
- and all things that are needed for it.
- WideString is defined as a 'silent' pwidechar :
- a pwidechar that points to :
- @-8 : SizeInt for reference count;
- @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
- with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
- Windows COM BSTR.
- @ : String + Terminating #0;
- Pwidechar(Widestring) is a valid typecast.
- So WS[i] is converted to the address @WS+i-1.
- Constants should be assigned a reference count of -1
- Meaning that they can't be disposed of.
- }
- Type
- PWideRec = ^TWideRec;
- TWideRec = Packed Record
- Len : DWord;
- First : WideChar;
- end;
- Const
- WideRecLen = SizeOf(TWideRec);
- WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
- {
- Default WideChar <-> Char conversion is to only convert the
- lower 127 chars, all others are translated to spaces.
- These routines can be overwritten for the Current Locale
- }
- procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;cp : TSystemCodePage;len:SizeInt);
- var
- i : SizeInt;
- destp: PChar;
- begin
- setlength(dest,len);
- destp := PChar(Pointer(dest));
- for i:=1 to len do
- begin
- if word(source^)<256 then
- destp^:=char(word(source^))
- else
- destp^:='?';
- inc(source);
- inc(destp);
- end;
- end;
- procedure DefaultAnsi2WideMove(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
- var
- i : SizeInt;
- begin
- setlength(dest,len);
- for i:=1 to len do
- begin
- dest[i]:=widechar(byte(source^));
- inc(source);
- end;
- end;
- {****************************************************************************
- Internal functions, not in interface.
- ****************************************************************************}
- procedure WideStringError;
- begin
- HandleErrorFrame(204,get_frame);
- end;
- {$ifdef WideStrDebug}
- Procedure DumpWideRec(S : Pointer);
- begin
- If S=Nil then
- Writeln ('String is nil')
- Else
- Begin
- With PWideRec(S-WideFirstOff)^ do
- begin
- Write ('(Len:',len);
- Writeln (' Ref: ',ref,')');
- end;
- end;
- end;
- {$endif}
- Function NewWideString(Len : SizeInt) : Pointer;
- {
- Allocate a new WideString on the heap.
- initialize it to zero length and reference count 1.
- }
- Var
- P : Pointer;
- begin
- {$ifdef MSWINDOWS}
- if winwidestringalloc then
- begin
- P:=SysAllocStringLen(nil,Len);
- if P=nil then
- WideStringError;
- end
- else
- {$endif MSWINDOWS}
- begin
- GetMem(P,Len*sizeof(WideChar)+WideRecLen);
- If P<>Nil then
- begin
- PWideRec(P)^.Len:=Len*2; { Initial length }
- PWideRec(P)^.First:=#0; { Terminating #0 }
- inc(p,WideFirstOff); { Points to string now }
- end
- else
- WideStringError;
- end;
- NewWideString:=P;
- end;
- Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
- {
- Decreases the ReferenceCount of a non constant widestring;
- If the reference count is zero, deallocate the string;
- }
- Begin
- If S=Nil then
- exit;
- {$ifdef MSWINDOWS}
- if winwidestringalloc then
- SysFreeString(S)
- else
- {$endif MSWINDOWS}
- begin
- Dec (S,WideFirstOff);
- Freemem(S);
- end;
- S:=Nil;
- end;
- { alias for internal use }
- Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
- Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
- var
- p : pointer;
- Begin
- If S=Nil then
- exit;
- p:=NewWidestring(length(WideString(S)));
- move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
- s:=p;
- end;
- { alias for internal use }
- Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
- {
- Converts a WideString to a ShortString;
- }
- Var
- Size : SizeInt;
- temp : ansistring;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- If Size>high_of_res then
- Size:=high_of_res;
- widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
- result:=temp;
- end;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
- {
- Converts a WideString to a ShortString;
- }
- Var
- Size : SizeInt;
- temp : ansistring;
- begin
- res:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- If Size>high(res) then
- Size:=high(res);
- widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,DefaultSystemCodePage,Size);
- res:=temp;
- end;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
- {
- Converts a ShortString to a WideString;
- }
- Var
- Size : SizeInt;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),DefaultSystemCodePage,result,Size);
- end;
- Function fpc_WideStr_To_AnsiStr (const S2 : WideString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
- {
- Converts a WideString to an AnsiString
- }
- Var
- Size : SizeInt;
- {$ifndef FPC_HAS_CPSTRING}
- cp : TSystemCodePage;
- {$endif FPC_HAS_CPSTRING}
- begin
- {$ifndef FPC_HAS_CPSTRING}
- cp:=DefaultSystemCodePage;
- {$endif FPC_HAS_CPSTRING}
- result:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- if (cp=CP_NONE) or (cp=0) then
- cp:=DefaultSystemCodePage;
- widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,cp,Size);
- end;
- end;
- Function fpc_AnsiStr_To_WideStr (Const S2 : RawByteString): WideString; compilerproc;
- {
- Converts an AnsiString to a WideString;
- }
- Var
- Size : SizeInt;
- cp: TSystemCodePage;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- cp:=StringCodePage(S2);
- if (cp=CP_NONE) or (cp=0) then
- cp:=DefaultSystemCodePage;
- widestringmanager.Ansi2WideMoveProc(PChar(S2),cp,result,Size);
- end;
- end;
- Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, -1, 0);
- Setlength(result,Size); // zero-terminates
- if Size>0 then
- Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
- end;
- { checked against the ansistring routine, 2001-05-27 (FK) }
- Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
- {
- Assigns S2 to S1 (S1:=S2), taking in account reference counts.
- }
- begin
- if S1=S2 then exit;
- if S2<>nil then
- begin
- {$ifdef MSWINDOWS}
- if winwidestringalloc then
- begin
- if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
- WideStringError;
- end
- else
- {$endif MSWINDOWS}
- begin
- SetLength(WideString(S1),length(WideString(S2)));
- move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
- end;
- end
- else
- begin
- { Free S1 }
- fpc_widestr_decr_ref (S1);
- S1:=nil;
- end;
- end;
- { alias for internal use }
- Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
- {$ifndef STR_CONCAT_PROCS}
- function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
- Var
- Size,Location : SizeInt;
- pc : pwidechar;
- begin
- { only assign if s1 or s2 is empty }
- if (S1='') then
- begin
- result:=s2;
- exit;
- end;
- if (S2='') then
- begin
- result:=s1;
- exit;
- end;
- Location:=Length(S1);
- Size:=length(S2);
- SetLength(result,Size+Location);
- pc:=pwidechar(result);
- Move(S1[1],pc^,Location*sizeof(WideChar));
- inc(pc,location);
- Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
- end;
- function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
- Var
- i : Longint;
- p : pointer;
- pc : pwidechar;
- Size,NewSize : SizeInt;
- begin
- { First calculate size of the result so we can do
- a single call to SetLength() }
- NewSize:=0;
- for i:=low(sarr) to high(sarr) do
- inc(Newsize,length(sarr[i]));
- SetLength(result,NewSize);
- pc:=pwidechar(result);
- for i:=low(sarr) to high(sarr) do
- begin
- p:=pointer(sarr[i]);
- if assigned(p) then
- begin
- Size:=length(widestring(p));
- Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
- inc(pc,size);
- end;
- end;
- end;
- {$else STR_CONCAT_PROCS}
- procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
- Var
- Size,Location : SizeInt;
- same : boolean;
- begin
- { only assign if s1 or s2 is empty }
- if (S1='') then
- begin
- DestS:=s2;
- exit;
- end;
- if (S2='') then
- begin
- DestS:=s1;
- exit;
- end;
- Location:=Length(S1);
- Size:=length(S2);
- { Use Pointer() typecasts to prevent extra conversion code }
- if Pointer(DestS)=Pointer(S1) then
- begin
- same:=Pointer(S1)=Pointer(S2);
- SetLength(DestS,Size+Location);
- if same then
- Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
- else
- Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
- end
- else if Pointer(DestS)=Pointer(S2) then
- begin
- SetLength(DestS,Size+Location);
- Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
- Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
- end
- else
- begin
- DestS:='';
- SetLength(DestS,Size+Location);
- Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
- Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
- end;
- end;
- procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
- Var
- i : Longint;
- p,pc : pointer;
- Size,NewLen : SizeInt;
- DestTmp : Widestring;
- begin
- if high(sarr)=0 then
- begin
- DestS:='';
- exit;
- end;
- { First calculate size of the result so we can do
- a single call to SetLength() }
- NewLen:=0;
- for i:=low(sarr) to high(sarr) do
- inc(NewLen,length(sarr[i]));
- SetLength(DestTmp,NewLen);
- pc:=pwidechar(DestTmp);
- for i:=low(sarr) to high(sarr) do
- begin
- p:=pointer(sarr[i]);
- if assigned(p) then
- begin
- Size:=length(widestring(p));
- Move(p^,pc^,(Size+1)*sizeof(WideChar));
- inc(pc,size*sizeof(WideChar));
- end;
- end;
- DestS:=DestTmp;
- end;
- {$endif STR_CONCAT_PROCS}
- Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
- {
- Converts a Char to a WideString;
- }
- begin
- Setlength(fpc_Char_To_WideStr,1);
- fpc_Char_To_WideStr[1]:=c;
- end;
- Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
- {
- Converts a WideChar to a WideString;
- }
- begin
- Setlength (fpc_WChar_To_WideStr,1);
- fpc_WChar_To_WideStr[1]:= c;
- end;
- Function fpc_WChar_To_AnsiStr(const c : WideChar{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}): AnsiString; compilerproc;
- {
- Converts a WideChar to a AnsiString;
- }
- begin
- widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr,{$ifdef FPC_HAS_CPSTRING}cp{$else}TSystemCodePage(0){$endif FPC_HAS_CPSTRING}, 1);
- end;
- Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
- {
- Converts a WideChar to a WideString;
- }
- begin
- Setlength (fpc_UChar_To_WideStr,1);
- fpc_UChar_To_WideStr[1]:= c;
- end;
- Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
- Var
- L : SizeInt;
- begin
- if (not assigned(p)) or (p[0]=#0) Then
- begin
- fpc_pchar_to_widestr := '';
- exit;
- end;
- l:=IndexChar(p^,-1,#0);
- widestringmanager.Ansi2WideMoveProc(P,DefaultSystemCodePage,fpc_PChar_To_WideStr,l);
- end;
- Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- if (arr[0]=#0) Then
- begin
- fpc_chararray_to_widestr := '';
- exit;
- end;
- i:=IndexChar(arr,high(arr)+1,#0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- widestringmanager.Ansi2WideMoveProc(pchar(@arr),DefaultSystemCodePage,fpc_CharArray_To_WideStr,i);
- end;
- {$ifndef FPC_STRTOCHARARRAYPROC}
- { inside the compiler, the resulttype is modified to that of the actual }
- { chararray we're converting to (JM) }
- function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
- var
- len: SizeInt;
- temp: ansistring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$push}
- {$r-}
- move(temp[1],fpc_widestr_to_chararray[0],len);
- fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
- {$pop}
- end;
- { inside the compiler, the resulttype is modified to that of the actual }
- { widechararray we're converting to (JM) }
- function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
- var
- len: SizeInt;
- begin
- len := length(src);
- if len > arraysize then
- len := arraysize;
- {$push}
- {$r-}
- { make sure we don't try to access element 1 of the ansistring if it's nil }
- if len > 0 then
- move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
- fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
- {$pop}
- end;
- { inside the compiler, the resulttype is modified to that of the actual }
- { chararray we're converting to (JM) }
- function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
- var
- len: SizeInt;
- temp: widestring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$push}
- {$r-}
- move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
- fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
- {$pop}
- end;
- function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
- var
- len: longint;
- temp : widestring;
- begin
- len := length(src);
- { make sure we don't access char 1 if length is 0 (JM) }
- if len > 0 then
- widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$push}
- {$r-}
- move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
- fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
- {$pop}
- end;
- {$else ndef FPC_STRTOCHARARRAYPROC}
- procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
- var
- len: SizeInt;
- temp: ansistring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,DefaultSystemCodePage,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$push}
- {$r-}
- move(temp[1],res[0],len);
- fillchar(res[len],length(res)-len,0);
- {$pop}
- end;
- procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
- var
- len: SizeInt;
- begin
- len := length(src);
- if len > length(res) then
- len := length(res);
- {$push}
- {$r-}
- { make sure we don't try to access element 1 of the ansistring if it's nil }
- if len > 0 then
- move(src[1],res[0],len*SizeOf(WideChar));
- fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
- {$pop}
- end;
- {$endif ndef FPC_STRTOCHARARRAYPROC}
- Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
- {
- Compares 2 WideStrings;
- The result is
- <0 if S1<S2
- 0 if S1=S2
- >0 if S1>S2
- }
- Var
- MaxI,Temp : SizeInt;
- begin
- if pointer(S1)=pointer(S2) then
- begin
- fpc_WideStr_Compare:=0;
- exit;
- end;
- Maxi:=Length(S1);
- temp:=Length(S2);
- If MaxI>Temp then
- MaxI:=Temp;
- Temp:=CompareWord(S1[1],S2[1],MaxI);
- if temp=0 then
- temp:=Length(S1)-Length(S2);
- fpc_WideStr_Compare:=Temp;
- end;
- Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
- {
- Compares 2 WideStrings for equality only;
- The result is
- 0 if S1=S2
- <>0 if S1<>S2
- }
- Var
- MaxI : SizeInt;
- begin
- if pointer(S1)=pointer(S2) then
- exit(0);
- Maxi:=Length(S1);
- If MaxI<>Length(S2) then
- exit(-1)
- else
- exit(CompareWord(S1[1],S2[1],MaxI));
- end;
- {$ifdef VER2_4}
- // obsolete but needed for bootstrapping with 2.4
- Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
- begin
- if p=nil then
- HandleErrorFrame(201,get_frame);
- end;
- Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
- begin
- if (index>len div 2) or (Index<1) then
- HandleErrorFrame(201,get_frame);
- end;
- {$else VER2_4}
- Procedure fpc_WideStr_CheckRange(p: Pointer; index: SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
- begin
- if (p=nil) or (index>PWideRec(p-WideFirstOff)^.len div 2) or (Index<1) then
- HandleErrorFrame(201,get_frame);
- end;
- {$endif VER2_4}
- Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
- {
- Sets The length of string S to L.
- Makes sure S is unique, and contains enough room.
- }
- Var
- Temp : Pointer;
- movelen: SizeInt;
- begin
- if (l>0) then
- begin
- if Pointer(S)=nil then
- begin
- { Need a complete new string...}
- Pointer(s):=NewWideString(l);
- end
- { windows doesn't support reallocing widestrings, this code
- is anyways subject to be removed because widestrings shouldn't be
- ref. counted anymore (FK) }
- else
- if
- {$ifdef MSWINDOWS}
- not winwidestringalloc and
- {$endif MSWINDOWS}
- True
- then
- begin
- Dec(Pointer(S),WideFirstOff);
- if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
- reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
- Inc(Pointer(S), WideFirstOff);
- end
- else
- begin
- { Reallocation is needed... }
- Temp:=Pointer(NewWideString(L));
- if Length(S)>0 then
- begin
- if l < succ(length(s)) then
- movelen := l
- { also move terminating null }
- else
- movelen := succ(length(s));
- Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
- end;
- fpc_widestr_decr_ref(Pointer(S));
- Pointer(S):=Temp;
- end;
- { Force nil termination in case it gets shorter }
- PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
- {$ifdef MSWINDOWS}
- if not winwidestringalloc then
- {$endif MSWINDOWS}
- PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
- end
- else
- begin
- { Length=0 }
- if Pointer(S)<>nil then
- fpc_widestr_decr_ref (Pointer(S));
- Pointer(S):=Nil;
- end;
- end;
- {*****************************************************************************
- Public functions, In interface.
- *****************************************************************************}
- Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
- begin
- pointer(result) := pointer(s);
- end;
- Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
- var
- ResultAddress : Pointer;
- begin
- ResultAddress:=Nil;
- dec(index);
- if Index < 0 then
- Index := 0;
- { Check Size. Accounts for Zero-length S, the double check is needed because
- Size can be maxint and will get <0 when adding index }
- if (Size>Length(S)) or
- (Index+Size>Length(S)) then
- Size:=Length(S)-Index;
- If Size>0 then
- begin
- If Index<0 Then
- Index:=0;
- ResultAddress:=Pointer(NewWideString (Size));
- if ResultAddress<>Nil then
- begin
- Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
- PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
- PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
- end;
- end;
- fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
- Pointer(fpc_widestr_Copy):=ResultAddress;
- end;
- Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
- var
- i,MaxLen : SizeInt;
- pc : pwidechar;
- begin
- Pos:=0;
- if Length(SubStr)>0 then
- begin
- MaxLen:=Length(source)-Length(SubStr);
- i:=0;
- pc:=@source[1];
- while (i<=MaxLen) do
- begin
- inc(i);
- if (SubStr[1]=pc^) and
- (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
- begin
- Pos:=i;
- exit;
- end;
- inc(pc);
- end;
- end;
- end;
- { Faster version for a widechar alone }
- Function Pos (c : WideChar; Const s : WideString) : SizeInt;
- var
- i: SizeInt;
- pc : pwidechar;
- begin
- pc:=@s[1];
- for i:=1 to length(s) do
- begin
- if pc^=c then
- begin
- pos:=i;
- exit;
- end;
- inc(pc);
- end;
- pos:=0;
- end;
- Function Pos (c : WideChar; Const s : RawByteString) : SizeInt;
- begin
- result:=Pos(c,WideString(s));
- end;
- Function Pos (c : RawByteString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(WideString(c),s);
- end;
- Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(WideString(c),s);
- end;
- Function Pos (c : WideString; Const s : RawByteString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(c,WideString(s));
- end;
- { Faster version for a char alone. Must be implemented because }
- { pos(c: char; const s: shortstring) also exists, so otherwise }
- { using pos(char,pchar) will always call the shortstring version }
- { (exact match for first argument), also with $h+ (JM) }
- Function Pos (c : Char; Const s : WideString) : SizeInt;
- var
- i: SizeInt;
- wc : widechar;
- pc : pwidechar;
- begin
- wc:=c;
- pc:=@s[1];
- for i:=1 to length(s) do
- begin
- if pc^=wc then
- begin
- pos:=i;
- exit;
- end;
- inc(pc);
- end;
- pos:=0;
- end;
- Procedure Delete (Var S : WideString; Index,Size: SizeInt);
- Var
- LS : SizeInt;
- begin
- LS:=Length(S);
- if (Index>LS) or (Index<=0) or (Size<=0) then
- exit;
- UniqueString (S);
- { (Size+Index) will overflow if Size=MaxInt. }
- if Size>LS-Index then
- Size:=LS-Index+1;
- if Size<=LS-Index then
- begin
- Dec(Index);
- Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
- end;
- Setlength(s,LS-Size);
- end;
- Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
- var
- Temp : WideString;
- LS : SizeInt;
- begin
- If Length(Source)=0 then
- exit;
- if index <= 0 then
- index := 1;
- Ls:=Length(S);
- if index > LS then
- index := LS+1;
- Dec(Index);
- Pointer(Temp) := NewWideString(Length(Source)+LS);
- SetLength(Temp,Length(Source)+LS);
- If Index>0 then
- move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
- Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
- If (LS-Index)>0 then
- Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
- S:=Temp;
- end;
- function UpCase(const s : WideString) : WideString;
- begin
- result:=widestringmanager.UpperWideStringProc(s);
- end;
- Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
- begin
- SetLength(S,Len);
- If (Buf<>Nil) and (Len>0) then
- Move (Buf[0],S[1],Len*sizeof(WideChar));
- end;
- Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
- begin
- SetLength(S,Len);
- If (Buf<>Nil) and (Len>0) then
- widestringmanager.Ansi2WideMoveProc(Buf,DefaultSystemCodePage,S,Len);
- end;
- {$ifndef FPUNONE}
- Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
- Var
- SS : String;
- begin
- fpc_Val_Real_WideStr := 0;
- if length(S) > 255 then
- code := 256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_Real_WideStr,code);
- end;
- end;
- {$endif}
- function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
- var ss:shortstring;
- begin
- if length(s)>255 then
- code:=256
- else
- begin
- ss:=s;
- val(ss,fpc_val_enum_widestr,code);
- end;
- end;
- Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
- Var
- SS : String;
- begin
- if length(S) > 255 then
- begin
- fpc_Val_Currency_WideStr:=0;
- code := 256;
- end
- else
- begin
- SS := S;
- Val(SS,fpc_Val_Currency_WideStr,code);
- end;
- end;
- Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_UInt_WideStr := 0;
- if length(S) > 255 then
- code := 256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_UInt_WideStr,code);
- end;
- end;
- Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_SInt_WideStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
- end;
- end;
- {$ifndef CPU64}
- Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_qword_WideStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_qword_WideStr,Code);
- end;
- end;
- Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_int64_WideStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_int64_WideStr,Code);
- end;
- end;
- {$endif CPU64}
- {$ifndef FPUNONE}
- procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
- var
- ss : shortstring;
- begin
- str_real(len,fr,d,treal_type(rt),ss);
- s:=ss;
- end;
- {$endif}
- procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
- var ss:shortstring;
- begin
- fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
- s:=ss;
- end;
- procedure fpc_widestr_bool(b : boolean;len:sizeint;out s:widestring);compilerproc;
- var ss:shortstring;
- begin
- fpc_shortstr_bool(b,len,ss);
- s:=ss;
- end;
- {$ifdef FPC_HAS_STR_CURRENCY}
- procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
- var
- ss : shortstring;
- begin
- str(c:len:fr,ss);
- s:=ss;
- end;
- {$endif FPC_HAS_STR_CURRENCY}
- Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
- Var
- SS : ShortString;
- begin
- Str (v:Len,SS);
- S:=SS;
- end;
- Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
- Var
- SS : ShortString;
- begin
- str(v:Len,SS);
- S:=SS;
- end;
- {$ifndef CPU64}
- Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
- Var
- SS : ShortString;
- begin
- Str (v:Len,SS);
- S:=SS;
- end;
- Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
- Var
- SS : ShortString;
- begin
- str(v:Len,SS);
- S:=SS;
- end;
- {$endif CPU64}
- { converts an utf-16 code point or surrogate pair to utf-32 }
- function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
- var
- w: widechar;
- begin
- { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
- { are the same in UTF-32 }
- w:=s[index];
- if (w<=#$d7ff) or
- (w>=#$e000) then
- begin
- result:=UCS4Char(w);
- len:=1;
- end
- { valid surrogate pair? }
- else if (w<=#$dbff) and
- { w>=#$d7ff check not needed, checked above }
- (index<length(s)) and
- (s[index+1]>=#$dc00) and
- (s[index+1]<=#$dfff) then
- { convert the surrogate pair to UTF-32 }
- begin
- result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
- len:=2;
- end
- else
- { invalid surrogate -> do nothing }
- begin
- result:=UCS4Char(w);
- len:=1;
- end;
- end;
- function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- if assigned(Source) then
- Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
- else
- Result:=0;
- end;
- function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
- var
- i,j : SizeUInt;
- w : word;
- lw : longword;
- len : longint;
- begin
- result:=0;
- if source=nil then
- exit;
- i:=0;
- j:=0;
- if assigned(Dest) then
- begin
- while (i<SourceChars) and (j<MaxDestBytes) do
- begin
- w:=word(Source[i]);
- case w of
- 0..$7f:
- begin
- Dest[j]:=char(w);
- inc(j);
- end;
- $80..$7ff:
- begin
- if j+1>=MaxDestBytes then
- break;
- Dest[j]:=char($c0 or (w shr 6));
- Dest[j+1]:=char($80 or (w and $3f));
- inc(j,2);
- end;
- $800..$d7ff,$e000..$ffff:
- begin
- if j+2>=MaxDestBytes then
- break;
- Dest[j]:=char($e0 or (w shr 12));
- Dest[j+1]:=char($80 or ((w shr 6) and $3f));
- Dest[j+2]:=char($80 or (w and $3f));
- inc(j,3);
- end;
- $d800..$dbff:
- {High Surrogates}
- begin
- if j+3>=MaxDestBytes then
- break;
- if (i<sourcechars-1) and
- (word(Source[i+1]) >= $dc00) and
- (word(Source[i+1]) <= $dfff) then
- begin
- lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
- Dest[j]:=char($f0 or (lw shr 18));
- Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
- Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
- Dest[j+3]:=char($80 or (lw and $3f));
- inc(j,4);
- inc(i);
- end;
- end;
- end;
- inc(i);
- end;
- if j>SizeUInt(MaxDestBytes-1) then
- j:=MaxDestBytes-1;
- Dest[j]:=#0;
- end
- else
- begin
- while i<SourceChars do
- begin
- case word(Source[i]) of
- $0..$7f:
- inc(j);
- $80..$7ff:
- inc(j,2);
- $800..$d7ff,$e000..$ffff:
- inc(j,3);
- $d800..$dbff:
- begin
- if (i<sourcechars-1) and
- (word(Source[i+1]) >= $dc00) and
- (word(Source[i+1]) <= $dfff) then
- begin
- inc(j,4);
- inc(i);
- end;
- end;
- end;
- inc(i);
- end;
- end;
- result:=j+1;
- end;
- function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- if assigned(Source) then
- Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
- else
- Result:=0;
- end;
- function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
- const
- UNICODE_INVALID=63;
- var
- InputUTF8: SizeUInt;
- IBYTE: BYTE;
- OutputUnicode: SizeUInt;
- PRECHAR: SizeUInt;
- TempBYTE: BYTE;
- CharLen: SizeUint;
- LookAhead: SizeUInt;
- UC: SizeUInt;
- begin
- if not assigned(Source) then
- begin
- result:=0;
- exit;
- end;
- result:=SizeUInt(-1);
- InputUTF8:=0;
- OutputUnicode:=0;
- PreChar:=0;
- if Assigned(Dest) Then
- begin
- while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
- begin
- IBYTE:=byte(Source[InputUTF8]);
- if (IBYTE and $80) = 0 then
- begin
- //One character US-ASCII, convert it to unicode
- if IBYTE = 10 then
- begin
- If (PreChar<>13) and FALSE then
- begin
- //Expand to crlf, conform UTF-8.
- //This procedure will break the memory alocation by
- //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
- if OutputUnicode+1<MaxDestChars then
- begin
- Dest[OutputUnicode]:=WideChar(13);
- inc(OutputUnicode);
- Dest[OutputUnicode]:=WideChar(10);
- inc(OutputUnicode);
- PreChar:=10;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(13);
- inc(OutputUnicode);
- end;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(IBYTE);
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(IBYTE);
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- inc(InputUTF8);
- end
- else
- begin
- TempByte:=IBYTE;
- CharLen:=0;
- while (TempBYTE and $80)<>0 do
- begin
- TempBYTE:=(TempBYTE shl 1) and $FE;
- inc(CharLen);
- end;
- //Test for the "CharLen" conforms UTF-8 string
- //This means the 10xxxxxx pattern.
- if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
- begin
- //Insuficient chars in string to decode
- //UTF-8 array. Fallback to single char.
- CharLen:= 1;
- end;
- for LookAhead := 1 to CharLen-1 do
- begin
- if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
- ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
- begin
- //Invalid UTF-8 sequence, fallback.
- CharLen:= LookAhead;
- break;
- end;
- end;
- UC:=$FFFF;
- case CharLen of
- 1: begin
- //Not valid UTF-8 sequence
- UC:=UNICODE_INVALID;
- end;
- 2: begin
- //Two bytes UTF, convert it
- UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
- UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
- if UC <= $7F then
- begin
- //Invalid UTF sequence.
- UC:=UNICODE_INVALID;
- end;
- end;
- 3: begin
- //Three bytes, convert it to unicode
- UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
- if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
- begin
- //Invalid UTF-8 sequence
- UC:= UNICODE_INVALID;
- End;
- end;
- 4: begin
- //Four bytes, convert it to two unicode characters
- UC:= (byte(Source[InputUTF8]) and $07) shl 18;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
- if (UC < $10000) or (UC > $10FFFF) then
- begin
- UC:= UNICODE_INVALID;
- end
- else
- begin
- { only store pair if room }
- dec(UC,$10000);
- if (OutputUnicode<MaxDestChars-1) then
- begin
- Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
- inc(OutputUnicode);
- UC:=(UC and $3ff) + $DC00;
- end
- else
- begin
- InputUTF8:= InputUTF8 + CharLen;
- { don't store anything }
- CharLen:=0;
- end;
- end;
- end;
- 5,6,7: begin
- //Invalid UTF8 to unicode conversion,
- //mask it as invalid UNICODE too.
- UC:=UNICODE_INVALID;
- end;
- end;
- if CharLen > 0 then
- begin
- PreChar:=UC;
- Dest[OutputUnicode]:=WideChar(UC);
- inc(OutputUnicode);
- end;
- InputUTF8:= InputUTF8 + CharLen;
- end;
- end;
- Result:=OutputUnicode+1;
- end
- else
- begin
- while (InputUTF8<SourceBytes) do
- begin
- IBYTE:=byte(Source[InputUTF8]);
- if (IBYTE and $80) = 0 then
- begin
- //One character US-ASCII, convert it to unicode
- if IBYTE = 10 then
- begin
- if (PreChar<>13) and FALSE then
- begin
- //Expand to crlf, conform UTF-8.
- //This procedure will break the memory alocation by
- //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
- inc(OutputUnicode,2);
- PreChar:=10;
- end
- else
- begin
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- end
- else
- begin
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- inc(InputUTF8);
- end
- else
- begin
- TempByte:=IBYTE;
- CharLen:=0;
- while (TempBYTE and $80)<>0 do
- begin
- TempBYTE:=(TempBYTE shl 1) and $FE;
- inc(CharLen);
- end;
- //Test for the "CharLen" conforms UTF-8 string
- //This means the 10xxxxxx pattern.
- if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
- begin
- //Insuficient chars in string to decode
- //UTF-8 array. Fallback to single char.
- CharLen:= 1;
- end;
- for LookAhead := 1 to CharLen-1 do
- begin
- if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
- ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
- begin
- //Invalid UTF-8 sequence, fallback.
- CharLen:= LookAhead;
- break;
- end;
- end;
- UC:=$FFFF;
- case CharLen of
- 1: begin
- //Not valid UTF-8 sequence
- UC:=UNICODE_INVALID;
- end;
- 2: begin
- //Two bytes UTF, convert it
- UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
- UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
- if UC <= $7F then
- begin
- //Invalid UTF sequence.
- UC:=UNICODE_INVALID;
- end;
- end;
- 3: begin
- //Three bytes, convert it to unicode
- UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
- If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
- begin
- //Invalid UTF-8 sequence
- UC:= UNICODE_INVALID;
- end;
- end;
- 4: begin
- //Four bytes, convert it to two unicode characters
- UC:= (byte(Source[InputUTF8]) and $07) shl 18;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
- if (UC < $10000) or (UC > $10FFFF) then
- UC:= UNICODE_INVALID
- else
- { extra character character }
- inc(OutputUnicode);
- end;
- 5,6,7: begin
- //Invalid UTF8 to unicode conversion,
- //mask it as invalid UNICODE too.
- UC:=UNICODE_INVALID;
- end;
- end;
- if CharLen > 0 then
- begin
- PreChar:=UC;
- inc(OutputUnicode);
- end;
- InputUTF8:= InputUTF8 + CharLen;
- end;
- end;
- Result:=OutputUnicode+1;
- end;
- end;
- function UTF8Encode(const s : WideString) : RawByteString;
- var
- i : SizeInt;
- hs : UTF8String;
- begin
- result:='';
- if s='' then
- exit;
- SetLength(hs,length(s)*3);
- i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
- if i>0 then
- begin
- SetLength(hs,i-1);
- result:=hs;
- end;
- end;
- const
- SNoWidestrings = 'This binary has no widestrings support compiled in.';
- SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
- procedure unimplementedwidestring;
- begin
- {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
- If IsConsole then
- begin
- Writeln(StdErr,SNoWidestrings);
- Writeln(StdErr,SRecompileWithWidestrings);
- end;
- {$endif FPC_HAS_FEATURE_CONSOLEIO}
- HandleErrorFrame(233,get_frame);
- end;
- {$warnings off}
- function GenericWideCase(const s : WideString) : WideString;
- begin
- unimplementedwidestring;
- end;
- function CompareWideString(const s1, s2 : WideString) : PtrInt;
- begin
- unimplementedwidestring;
- end;
- function CompareTextWideString(const s1, s2 : WideString): PtrInt;
- begin
- unimplementedwidestring;
- end;
- {$warnings on}
- function DefaultCharLengthPChar(const Str: PChar): PtrInt;forward;
- function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;forward;
- procedure initwidestringmanager;
- begin
- fillchar(widestringmanager,sizeof(widestringmanager),0);
- {$ifndef HAS_WIDESTRINGMANAGER}
- widestringmanager.Wide2AnsiMoveProc:=@DefaultWide2AnsiMove;
- widestringmanager.Ansi2WideMoveProc:=@DefaultAnsi2WideMove;
- widestringmanager.UpperWideStringProc:=@GenericWideCase;
- widestringmanager.LowerWideStringProc:=@GenericWideCase;
- {$endif HAS_WIDESTRINGMANAGER}
- widestringmanager.CompareWideStringProc:=@CompareWideString;
- widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
- widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar;
- widestringmanager.CodePointLengthProc:=@DefaultCodePointLength;
- end;
|