123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 1999-2005 by Florian Klaempfl,
- member of the Free Pascal development team.
- This file implements support routines for UTF-8 strings with FPC
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$i wustrings.inc}
- {
- This file contains the implementation of the UnicodeString type,
- and all things that are needed for it.
- UnicodeString is defined as a 'silent' punicodechar :
- a punicodechar that points to :
- @-8 : SizeInt for reference count;
- @-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
- with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
- Windows COM BSTR.
- @ : String + Terminating #0;
- Punicodechar(Unicodestring) is a valid typecast.
- So WS[i] is converted to the address @WS+i-1.
- Constants should be assigned a reference count of -1
- Meaning that they can't be disposed of.
- }
- Type
- PUnicodeRec = ^TUnicodeRec;
- TUnicodeRec = Packed Record
- Ref : SizeInt;
- Len : SizeInt;
- First : UnicodeChar;
- end;
- Const
- UnicodeRecLen = SizeOf(TUnicodeRec);
- UnicodeFirstOff = SizeOf(TUnicodeRec)-sizeof(UnicodeChar);
- {
- Default UnicodeChar <-> Char conversion is to only convert the
- lower 127 chars, all others are translated to spaces.
- These routines can be overwritten for the Current Locale
- }
- procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:ansistring;len:SizeInt);
- var
- i : SizeInt;
- begin
- setlength(dest,len);
- for i:=1 to len do
- begin
- if word(source^)<256 then
- dest[i]:=char(word(source^))
- else
- dest[i]:='?';
- inc(source);
- end;
- end;
- procedure DefaultAnsi2UnicodeMove(source:pchar;var dest:unicodestring;len:SizeInt);
- var
- i : SizeInt;
- begin
- setlength(dest,len);
- for i:=1 to len do
- begin
- dest[i]:=unicodechar(byte(source^));
- inc(source);
- end;
- end;
- Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
- begin
- manager:=widestringmanager;
- end;
- Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
- begin
- Old:=widestringmanager;
- widestringmanager:=New;
- end;
- Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
- begin
- widestringmanager:=New;
- end;
- Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
- begin
- manager:=widestringmanager;
- end;
- Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
- begin
- Old:=widestringmanager;
- widestringmanager:=New;
- end;
- Procedure SetWideStringManager (Const New : TUnicodeStringManager);
- begin
- widestringmanager:=New;
- end;
- {****************************************************************************
- Internal functions, not in interface.
- ****************************************************************************}
- procedure UnicodeStringError;
- begin
- HandleErrorFrame(204,get_frame);
- end;
- {$ifdef UnicodeStrDebug}
- Procedure DumpUnicodeRec(S : Pointer);
- begin
- If S=Nil then
- Writeln ('String is nil')
- Else
- Begin
- With PUnicodeRec(S-UnicodeFirstOff)^ do
- begin
- Write ('(Len:',len);
- Writeln (' Ref: ',ref,')');
- end;
- end;
- end;
- {$endif}
- Function NewUnicodeString(Len : SizeInt) : Pointer;
- {
- Allocate a new UnicodeString on the heap.
- initialize it to zero length and reference count 1.
- }
- Var
- P : Pointer;
- begin
- GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
- If P<>Nil then
- begin
- PUnicodeRec(P)^.Len:=Len*2; { Initial length }
- PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
- PUnicodeRec(P)^.First:=#0; { Terminating #0 }
- inc(p,UnicodeFirstOff); { Points to string now }
- end
- else
- UnicodeStringError;
- NewUnicodeString:=P;
- end;
- Procedure DisposeUnicodeString(Var S : Pointer);
- {
- Deallocates a UnicodeString From the heap.
- }
- begin
- If S=Nil then
- exit;
- Dec (S,UnicodeFirstOff);
- Freemem(S);
- S:=Nil;
- end;
- Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_UNICODESTR_DECR_REF']; compilerproc;
- {
- Decreases the ReferenceCount of a non constant unicodestring;
- If the reference count is zero, deallocate the string;
- }
- Type
- pSizeInt = ^SizeInt;
- Var
- l : pSizeInt;
- Begin
- { Zero string }
- if S=Nil then
- exit;
- { check for constant strings ...}
- l:=@PUnicodeRec(S-UnicodeFirstOff)^.Ref;
- if l^<0 then
- exit;
- { declocked does a MT safe dec and returns true, if the counter is 0 }
- if declocked(l^) then
- { Ref count dropped to zero ...
- ... remove }
- DisposeUnicodeString(S);
- end;
- { alias for internal use }
- Procedure fpc_UnicodeStr_Decr_Ref (Var S : Pointer);[external name 'FPC_UNICODESTR_DECR_REF'];
- Procedure fpc_UnicodeStr_Incr_Ref(S : Pointer);[Public,Alias:'FPC_UNICODESTR_INCR_REF']; compilerproc;
- Begin
- If S=Nil then
- exit;
- { constant string ? }
- If PUnicodeRec(S-UnicodeFirstOff)^.Ref<0 then
- exit;
- inclocked(PUnicodeRec(S-UnicodeFirstOff)^.Ref);
- end;
- { alias for internal use }
- Procedure fpc_UnicodeStr_Incr_Ref (S : Pointer);[external name 'FPC_UNICODESTR_INCR_REF'];
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- function fpc_UnicodeStr_To_ShortStr (high_of_res: SizeInt;const S2 : UnicodeString): shortstring;[Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR']; compilerproc;
- {
- Converts a UnicodeString to a ShortString;
- }
- Var
- Size : SizeInt;
- temp : ansistring;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- If Size>high_of_res then
- Size:=high_of_res;
- widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
- result:=temp;
- end;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
- {
- Converts a UnicodeString to a ShortString;
- }
- Var
- Size : SizeInt;
- temp : ansistring;
- begin
- res:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- If Size>high(res) then
- Size:=high(res);
- widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
- res:=temp;
- end;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
- {
- Converts a ShortString to a UnicodeString;
- }
- Var
- Size : SizeInt;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- begin
- widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),result,Size);
- { Terminating Zero }
- PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
- end;
- end;
- Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString): AnsiString; compilerproc;
- {
- Converts a UnicodeString to an AnsiString
- }
- Var
- Size : SizeInt;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,Size);
- end;
- Function fpc_AnsiStr_To_UnicodeStr (Const S2 : AnsiString): UnicodeString; compilerproc;
- {
- Converts an AnsiString to a UnicodeString;
- }
- Var
- Size : SizeInt;
- begin
- result:='';
- Size:=Length(S2);
- if Size>0 then
- widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),result,Size);
- end;
- Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
- begin
- SetLength(Result,Length(S2));
- Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
- end;
- Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
- begin
- SetLength(Result,Length(S2));
- Move(pointer(S2)^,Pointer(Result)^,Length(S2)*sizeof(WideChar));
- end;
- Function fpc_PUnicodeChar_To_AnsiStr(const p : punicodechar): ansistring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, -1, 0);
- if Size>0 then
- widestringmanager.Unicode2AnsiMoveProc(P,result,Size);
- end;
- Function fpc_PUnicodeChar_To_UnicodeStr(const p : punicodechar): unicodestring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, -1, 0);
- Setlength(result,Size);
- if Size>0 then
- begin
- Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
- { Terminating Zero }
- PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
- end;
- end;
- Function fpc_PWideChar_To_UnicodeStr(const p : pwidechar): unicodestring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, -1, 0);
- Setlength(result,Size);
- if Size>0 then
- begin
- Move(p^,PUnicodeChar(Pointer(result))^,Size*sizeof(UnicodeChar));
- { Terminating Zero }
- PUnicodeChar(Pointer(result)+Size*sizeof(UnicodeChar))^:=#0;
- end;
- end;
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- Function fpc_PUnicodeChar_To_ShortStr(const p : punicodechar): shortstring; compilerproc;
- var
- Size : SizeInt;
- temp: ansistring;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, $7fffffff, 0);
- if Size>0 then
- begin
- widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
- result:=temp;
- end;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_PUnicodeChar_To_ShortStr(out res : shortstring;const p : punicodechar); compilerproc;
- var
- Size : SizeInt;
- temp: ansistring;
- begin
- res:='';
- if p=nil then
- exit;
- Size:=IndexWord(p^, high(PtrInt), 0);
- if Size>0 then
- begin
- widestringmanager.Unicode2AnsiMoveProc(p,temp,Size);
- res:=temp;
- end;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_PWideChar_To_AnsiStr(const p : pwidechar): ansistring; compilerproc;
- var
- Size : SizeInt;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, -1, 0);
- if Size>0 then
- widestringmanager.Wide2AnsiMoveProc(P,result,Size);
- end;
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- Function fpc_PWideChar_To_ShortStr(const p : pwidechar): shortstring; compilerproc;
- var
- Size : SizeInt;
- temp: ansistring;
- begin
- result:='';
- if p=nil then
- exit;
- Size := IndexWord(p^, $7fffffff, 0);
- if Size>0 then
- begin
- widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
- result:=temp;
- end;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_PWideChar_To_ShortStr(out res : shortstring;const p : pwidechar); compilerproc;
- var
- Size : SizeInt;
- temp: ansistring;
- begin
- res:='';
- if p=nil then
- exit;
- Size:=IndexWord(p^, high(PtrInt), 0);
- if Size>0 then
- begin
- widestringmanager.Wide2AnsiMoveProc(p,temp,Size);
- res:=temp;
- end;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- { checked against the ansistring routine, 2001-05-27 (FK) }
- Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_UNICODESTR_ASSIGN']; compilerproc;
- {
- Assigns S2 to S1 (S1:=S2), taking in account reference counts.
- }
- begin
- If S2<>nil then
- If PUnicodeRec(S2-UnicodeFirstOff)^.Ref>0 then
- inclocked(PUnicodeRec(S2-UnicodeFirstOff)^.ref);
- { Decrease the reference count on the old S1 }
- fpc_unicodestr_decr_ref (S1);
- s1:=s2;
- end;
- { alias for internal use }
- Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_UNICODESTR_ASSIGN'];
- {$ifndef STR_CONCAT_PROCS}
- function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
- Var
- Size,Location : SizeInt;
- pc : punicodechar;
- begin
- { only assign if s1 or s2 is empty }
- if (S1='') then
- begin
- result:=s2;
- exit;
- end;
- if (S2='') then
- begin
- result:=s1;
- exit;
- end;
- Location:=Length(S1);
- Size:=length(S2);
- SetLength(result,Size+Location);
- pc:=punicodechar(result);
- Move(S1[1],pc^,Location*sizeof(UnicodeChar));
- inc(pc,location);
- Move(S2[1],pc^,(Size+1)*sizeof(UnicodeChar));
- end;
- function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
- Var
- i : Longint;
- p : pointer;
- pc : punicodechar;
- Size,NewSize : SizeInt;
- begin
- { First calculate size of the result so we can do
- a single call to SetLength() }
- NewSize:=0;
- for i:=low(sarr) to high(sarr) do
- inc(Newsize,length(sarr[i]));
- SetLength(result,NewSize);
- pc:=punicodechar(result);
- for i:=low(sarr) to high(sarr) do
- begin
- p:=pointer(sarr[i]);
- if assigned(p) then
- begin
- Size:=length(unicodestring(p));
- Move(punicodechar(p)^,pc^,(Size+1)*sizeof(UnicodeChar));
- inc(pc,size);
- end;
- end;
- end;
- {$else STR_CONCAT_PROCS}
- procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
- Var
- Size,Location : SizeInt;
- same : boolean;
- begin
- { only assign if s1 or s2 is empty }
- if (S1='') then
- begin
- DestS:=s2;
- exit;
- end;
- if (S2='') then
- begin
- DestS:=s1;
- exit;
- end;
- Location:=Length(S1);
- Size:=length(S2);
- { Use Pointer() typecasts to prevent extra conversion code }
- if Pointer(DestS)=Pointer(S1) then
- begin
- same:=Pointer(S1)=Pointer(S2);
- SetLength(DestS,Size+Location);
- if same then
- Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
- else
- Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
- end
- else if Pointer(DestS)=Pointer(S2) then
- begin
- SetLength(DestS,Size+Location);
- Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
- Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
- end
- else
- begin
- DestS:='';
- SetLength(DestS,Size+Location);
- Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
- Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
- end;
- end;
- procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
- Var
- i : Longint;
- p,pc : pointer;
- Size,NewLen : SizeInt;
- lowstart : longint;
- destcopy : pointer;
- OldDestLen : SizeInt;
- begin
- if high(sarr)=0 then
- begin
- DestS:='';
- exit;
- end;
- destcopy:=nil;
- lowstart:=low(sarr);
- if Pointer(DestS)=Pointer(sarr[lowstart]) then
- inc(lowstart);
- { Check for another reuse, then we can't use
- the append optimization }
- for i:=lowstart to high(sarr) do
- begin
- if Pointer(DestS)=Pointer(sarr[i]) then
- begin
- { if DestS is used somewhere in the middle of the expression,
- we need to make sure the original string still exists after
- we empty/modify DestS.
- This trick only works with reference counted strings. Therefor
- this optimization is disabled for WINLIKEUNICODESTRING }
- destcopy:=pointer(dests);
- fpc_UnicodeStr_Incr_Ref(destcopy);
- lowstart:=low(sarr);
- break;
- end;
- end;
- { Start with empty DestS if we start with concatting
- the first array element }
- if lowstart=low(sarr) then
- DestS:='';
- OldDestLen:=length(DestS);
- { Calculate size of the result so we can do
- a single call to SetLength() }
- NewLen:=0;
- for i:=low(sarr) to high(sarr) do
- inc(NewLen,length(sarr[i]));
- SetLength(DestS,NewLen);
- { Concat all strings, except the string we already
- copied in DestS }
- pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
- for i:=lowstart to high(sarr) do
- begin
- p:=pointer(sarr[i]);
- if assigned(p) then
- begin
- Size:=length(unicodestring(p));
- Move(p^,pc^,(Size+1)*sizeof(UnicodeChar));
- inc(pc,size*sizeof(UnicodeChar));
- end;
- end;
- fpc_UnicodeStr_Decr_Ref(destcopy);
- end;
- {$endif STR_CONCAT_PROCS}
- Function fpc_Char_To_UChar(const c : Char): UnicodeChar; compilerproc;
- var
- w: unicodestring;
- begin
- widestringmanager.Ansi2UnicodeMoveProc(@c, w, 1);
- fpc_Char_To_UChar:= w[1];
- end;
- Function fpc_Char_To_UnicodeStr(const c : Char): UnicodeString; compilerproc;
- {
- Converts a Char to a UnicodeString;
- }
- begin
- Setlength(fpc_Char_To_UnicodeStr,1);
- fpc_Char_To_UnicodeStr[1]:=c;
- { Terminating Zero }
- PUnicodeChar(Pointer(fpc_Char_To_UnicodeStr)+sizeof(UnicodeChar))^:=#0;
- end;
- Function fpc_UChar_To_Char(const c : UnicodeChar): Char; compilerproc;
- {
- Converts a UnicodeChar to a Char;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
- if length(s)=1 then
- fpc_UChar_To_Char:= s[1]
- else
- fpc_UChar_To_Char:='?';
- end;
- Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
- {
- Converts a WideChar to a UnicodeString;
- }
- begin
- Setlength (Result,1);
- Result[1]:= c;
- end;
- Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
- var
- w: widestring;
- begin
- widestringmanager.Ansi2WideMoveProc(@c, w, 1);
- fpc_Char_To_WChar:= w[1];
- end;
- Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
- {
- Converts a WideChar to a Char;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
- if length(s)=1 then
- fpc_WChar_To_Char:= s[1]
- else
- fpc_WChar_To_Char:='?';
- end;
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
- {
- Converts a WideChar to a ShortString;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
- fpc_WChar_To_ShortStr:= s;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
- {
- Converts a WideChar to a ShortString;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Wide2AnsiMoveProc(@c,s,1);
- res:=s;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
- {
- Converts a UnicodeChar to a UnicodeString;
- }
- begin
- Setlength (fpc_UChar_To_UnicodeStr,1);
- fpc_UChar_To_UnicodeStr[1]:= c;
- end;
- Function fpc_UChar_To_AnsiStr(const c : UnicodeChar): AnsiString; compilerproc;
- {
- Converts a UnicodeChar to a AnsiString;
- }
- begin
- widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, 1);
- end;
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
- {
- Converts a UnicodeChar to a ShortString;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
- fpc_UChar_To_ShortStr:= s;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
- {
- Converts a UnicodeChar to a ShortString;
- }
- var
- s: ansistring;
- begin
- widestringmanager.Unicode2AnsiMoveProc(@c,s,1);
- res:=s;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
- Var
- L : SizeInt;
- begin
- if (not assigned(p)) or (p[0]=#0) Then
- begin
- fpc_pchar_to_unicodestr := '';
- exit;
- end;
- l:=IndexChar(p^,-1,#0);
- widestringmanager.Ansi2UnicodeMoveProc(P,fpc_PChar_To_UnicodeStr,l);
- end;
- Function fpc_CharArray_To_UnicodeStr(const arr: array of char; zerobased: boolean = true): UnicodeString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- if (arr[0]=#0) Then
- begin
- fpc_chararray_to_unicodestr := '';
- exit;
- end;
- i:=IndexChar(arr,high(arr)+1,#0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_CharArray_To_UnicodeStr,i);
- widestringmanager.Ansi2UnicodeMoveProc (pchar(@arr),fpc_CharArray_To_UnicodeStr,i);
- end;
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
- var
- l: longint;
- index: longint;
- len: byte;
- temp: ansistring;
- begin
- l := high(arr)+1;
- if l>=256 then
- l:=255
- else if l<0 then
- l:=0;
- if zerobased then
- begin
- index:=IndexWord(arr[0],l,0);
- if (index < 0) then
- len := l
- else
- len := index;
- end
- else
- len := l;
- widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
- fpc_UnicodeCharArray_To_ShortStr := temp;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_UnicodeCharArray_To_ShortStr(out res : shortstring;const arr: array of unicodechar; zerobased: boolean = true);[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
- var
- l: longint;
- index: ptrint;
- len: byte;
- temp: ansistring;
- begin
- l := high(arr)+1;
- if l>=high(res)+1 then
- l:=high(res)
- else if l<0 then
- l:=0;
- if zerobased then
- begin
- index:=IndexWord(arr[0],l,0);
- if index<0 then
- len:=l
- else
- len:=index;
- end
- else
- len:=l;
- widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),temp,len);
- res:=temp;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; zerobased: boolean = true): AnsiString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- i:=IndexWord(arr,high(arr)+1,0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
- widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,i);
- end;
- Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- i:=IndexWord(arr,high(arr)+1,0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_UnicodeCharArray_To_UnicodeStr,i);
- Move(arr[0], Pointer(fpc_UnicodeCharArray_To_UnicodeStr)^,i*sizeof(UnicodeChar));
- end;
- Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- i:=IndexWord(arr,high(arr)+1,0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_WideCharArray_To_UnicodeStr,i);
- Move(arr[0], Pointer(fpc_WideCharArray_To_UnicodeStr)^,i*sizeof(WideChar));
- end;
- { due to their names, the following procedures should be in wstrings.inc,
- however, the compiler generates code using this functions on all platforms }
- {$ifndef FPC_STRTOSHORTSTRINGPROC}
- function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
- var
- l: longint;
- index: longint;
- len: byte;
- temp: ansistring;
- begin
- l := high(arr)+1;
- if l>=256 then
- l:=255
- else if l<0 then
- l:=0;
- if zerobased then
- begin
- index:=IndexWord(arr[0],l,0);
- if (index < 0) then
- len := l
- else
- len := index;
- end
- else
- len := l;
- widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
- fpc_WideCharArray_To_ShortStr := temp;
- end;
- {$else FPC_STRTOSHORTSTRINGPROC}
- procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
- var
- l: longint;
- index: ptrint;
- len: byte;
- temp: ansistring;
- begin
- l := high(arr)+1;
- if l>=high(res)+1 then
- l:=high(res)
- else if l<0 then
- l:=0;
- if zerobased then
- begin
- index:=IndexWord(arr[0],l,0);
- if index<0 then
- len:=l
- else
- len:=index;
- end
- else
- len:=l;
- widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
- res:=temp;
- end;
- {$endif FPC_STRTOSHORTSTRINGPROC}
- Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; zerobased: boolean = true): AnsiString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- i:=IndexWord(arr,high(arr)+1,0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_WideCharArray_To_AnsiStr,i);
- widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,i);
- end;
- Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
- var
- i : SizeInt;
- begin
- if (zerobased) then
- begin
- i:=IndexWord(arr,high(arr)+1,0);
- if i = -1 then
- i := high(arr)+1;
- end
- else
- i := high(arr)+1;
- SetLength(fpc_WideCharArray_To_WideStr,i);
- Move(arr[0], Pointer(fpc_WideCharArray_To_WideStr)^,i*sizeof(WideChar));
- end;
- {$ifndef FPC_STRTOCHARARRAYPROC}
- { inside the compiler, the resulttype is modified to that of the actual }
- { chararray we're converting to (JM) }
- function fpc_unicodestr_to_chararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_chararray;[public,alias: 'FPC_UNICODESTR_TO_CHARARRAY']; compilerproc;
- var
- len: SizeInt;
- temp: ansistring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$r-}
- move(temp[1],fpc_unicodestr_to_chararray[0],len);
- fillchar(fpc_unicodestr_to_chararray[len],arraysize-len,0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- { inside the compiler, the resulttype is modified to that of the actual }
- { unicodechararray we're converting to (JM) }
- function fpc_unicodestr_to_unicodechararray(arraysize: SizeInt; const src: UnicodeString): fpc_big_unicodechararray;[public,alias: 'FPC_UNICODESTR_TO_UNICODECHARARRAY']; compilerproc;
- var
- len: SizeInt;
- begin
- len := length(src);
- if len > arraysize then
- len := arraysize;
- {$r-}
- { make sure we don't try to access element 1 of the ansistring if it's nil }
- if len > 0 then
- move(src[1],fpc_unicodestr_to_unicodechararray[0],len*SizeOf(UnicodeChar));
- fillchar(fpc_unicodestr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- { inside the compiler, the resulttype is modified to that of the actual }
- { chararray we're converting to (JM) }
- function fpc_ansistr_to_unicodechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_unicodechararray;[public,alias: 'FPC_ANSISTR_TO_UNICODECHARARRAY']; compilerproc;
- var
- len: SizeInt;
- temp: unicodestring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$r-}
- move(temp[1],fpc_ansistr_to_unicodechararray[0],len*sizeof(unicodechar));
- fillchar(fpc_ansistr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- function fpc_shortstr_to_unicodechararray(arraysize: SizeInt; const src: ShortString): fpc_big_unicodechararray;[public,alias: 'FPC_SHORTSTR_TO_UNICODECHARARRAY']; compilerproc;
- var
- len: longint;
- temp : unicodestring;
- begin
- len := length(src);
- { make sure we don't access char 1 if length is 0 (JM) }
- if len > 0 then
- widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > arraysize then
- len := arraysize;
- {$r-}
- move(temp[1],fpc_shortstr_to_unicodechararray[0],len*sizeof(unicodechar));
- fillchar(fpc_shortstr_to_unicodechararray[len],(arraysize-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- {$else ndef FPC_STRTOCHARARRAYPROC}
- procedure fpc_unicodestr_to_chararray(out res: array of char; const src: UnicodeString); compilerproc;
- var
- len: SizeInt;
- temp: ansistring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.unicode2ansimoveproc(punicodechar(@src[1]),temp,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$r-}
- move(temp[1],res[0],len);
- fillchar(res[len],length(res)-len,0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
- var
- len: SizeInt;
- begin
- len := length(src);
- if len > length(res) then
- len := length(res);
- {$r-}
- { make sure we don't try to access element 1 of the ansistring if it's nil }
- if len > 0 then
- move(src[1],res[0],len*SizeOf(UnicodeChar));
- fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
- var
- len: SizeInt;
- temp: unicodestring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$r-}
- move(temp[1],res[0],len*sizeof(unicodechar));
- fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
- var
- len: longint;
- temp : unicodestring;
- begin
- len := length(src);
- { make sure we don't access char 1 if length is 0 (JM) }
- if len > 0 then
- widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$r-}
- move(temp[1],res[0],len*sizeof(unicodechar));
- fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
- var
- len: SizeInt;
- temp: widestring;
- begin
- len := length(src);
- { make sure we don't dereference src if it can be nil (JM) }
- if len > 0 then
- widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$r-}
- move(temp[1],res[0],len*sizeof(widechar));
- fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
- var
- len: longint;
- temp : widestring;
- begin
- len := length(src);
- { make sure we don't access char 1 if length is 0 (JM) }
- if len > 0 then
- widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
- len := length(temp);
- if len > length(res) then
- len := length(res);
- {$r-}
- move(temp[1],res[0],len*sizeof(widechar));
- fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
- var
- len: SizeInt;
- begin
- len := length(src);
- if len > length(res) then
- len := length(res);
- {$r-}
- { make sure we don't try to access element 1 of the widestring if it's nil }
- if len > 0 then
- move(src[1],res[0],len*SizeOf(WideChar));
- fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
- {$ifdef RangeCheckWasOn}
- {$r+}
- {$endif}
- end;
- {$endif ndef FPC_STRTOCHARARRAYPROC}
- Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE']; compilerproc;
- {
- Compares 2 UnicodeStrings;
- The result is
- <0 if S1<S2
- 0 if S1=S2
- >0 if S1>S2
- }
- Var
- MaxI,Temp : SizeInt;
- begin
- if pointer(S1)=pointer(S2) then
- begin
- fpc_UnicodeStr_Compare:=0;
- exit;
- end;
- Maxi:=Length(S1);
- temp:=Length(S2);
- If MaxI>Temp then
- MaxI:=Temp;
- Temp:=CompareWord(S1[1],S2[1],MaxI);
- if temp=0 then
- temp:=Length(S1)-Length(S2);
- fpc_UnicodeStr_Compare:=Temp;
- end;
- Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt;[Public,Alias : 'FPC_UNICODESTR_COMPARE_EQUAL']; compilerproc;
- {
- Compares 2 UnicodeStrings for equality only;
- The result is
- 0 if S1=S2
- <>0 if S1<>S2
- }
- Var
- MaxI : SizeInt;
- begin
- if pointer(S1)=pointer(S2) then
- exit(0);
- Maxi:=Length(S1);
- If MaxI<>Length(S2) then
- exit(-1)
- else
- exit(CompareWord(S1[1],S2[1],MaxI));
- end;
- Procedure fpc_UnicodeStr_CheckZero(p : pointer);[Public,Alias : 'FPC_UNICODESTR_CHECKZERO']; compilerproc;
- begin
- if p=nil then
- HandleErrorFrame(201,get_frame);
- end;
- Procedure fpc_UnicodeStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_UNICODESTR_RANGECHECK']; compilerproc;
- begin
- if (index>len div 2) or (Index<1) then
- HandleErrorFrame(201,get_frame);
- end;
- Procedure fpc_UnicodeStr_SetLength(Var S : UnicodeString; l : SizeInt);[Public,Alias : 'FPC_UNICODESTR_SETLENGTH']; compilerproc;
- {
- Sets The length of string S to L.
- Makes sure S is unique, and contains enough room.
- }
- Var
- Temp : Pointer;
- movelen: SizeInt;
- begin
- if (l>0) then
- begin
- if Pointer(S)=nil then
- begin
- { Need a complete new string...}
- Pointer(s):=NewUnicodeString(l);
- end
- { windows doesn't support reallocing unicodestrings, this code
- is anyways subject to be removed because unicodestrings shouldn't be
- ref. counted anymore (FK) }
- else
- if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
- begin
- Dec(Pointer(S),UnicodeFirstOff);
- if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
- reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
- Inc(Pointer(S), UnicodeFirstOff);
- end
- else
- begin
- { Reallocation is needed... }
- Temp:=Pointer(NewUnicodeString(L));
- if Length(S)>0 then
- begin
- if l < succ(length(s)) then
- movelen := l
- { also move terminating null }
- else
- movelen := succ(length(s));
- Move(Pointer(S)^,Temp^,movelen * Sizeof(UnicodeChar));
- end;
- fpc_unicodestr_decr_ref(Pointer(S));
- Pointer(S):=Temp;
- end;
- { Force nil termination in case it gets shorter }
- PWord(Pointer(S)+l*sizeof(UnicodeChar))^:=0;
- PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len:=l*sizeof(UnicodeChar);
- end
- else
- begin
- { Length=0 }
- if Pointer(S)<>nil then
- fpc_unicodestr_decr_ref (Pointer(S));
- Pointer(S):=Nil;
- end;
- end;
- {*****************************************************************************
- Public functions, In interface.
- *****************************************************************************}
- function UnicodeCharToString(S : PUnicodeChar) : AnsiString;
- begin
- result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
- end;
- function StringToUnicodeChar(const Src : AnsiString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
- var
- temp:unicodestring;
- begin
- widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),temp,Length(Src));
- if Length(temp)<DestSize then
- move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
- else
- move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
- Dest[DestSize-1]:=#0;
- result:=Dest;
- end;
- function WideCharToString(S : PWideChar) : AnsiString;
- begin
- result:=WideCharLenToString(s,Length(WideString(s)));
- end;
- function StringToWideChar(const Src : AnsiString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
- var
- temp:widestring;
- begin
- widestringmanager.Ansi2WideMoveProc(PChar(Src),temp,Length(Src));
- if Length(temp)<DestSize then
- move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
- else
- move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
- Dest[DestSize-1]:=#0;
- result:=Dest;
- end;
- function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : AnsiString;
- begin
- //SetLength(result,Len);
- widestringmanager.Unicode2AnsiMoveproc(S,result,Len);
- end;
- procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
- begin
- Dest:=UnicodeCharLenToString(Src,Len);
- end;
- procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
- begin
- Dest:=UnicodeCharToString(S);
- end;
- function WideCharLenToString(S : PWideChar;Len : SizeInt) : AnsiString;
- begin
- //SetLength(result,Len);
- widestringmanager.Wide2AnsiMoveproc(S,result,Len);
- end;
- procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
- begin
- Dest:=WideCharLenToString(Src,Len);
- end;
- procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
- begin
- Dest:=WideCharToString(S);
- end;
- Function fpc_unicodestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_UNICODESTR_UNIQUE']; compilerproc;
- {
- Make sure reference count of S is 1,
- using copy-on-write semantics.
- }
- Var
- SNew : Pointer;
- L : SizeInt;
- begin
- pointer(result) := pointer(s);
- If Pointer(S)=Nil then
- exit;
- if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
- begin
- L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
- SNew:=NewUnicodeString (L);
- Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
- PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
- fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
- pointer(S):=SNew;
- pointer(result):=SNew;
- end;
- end;
- Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
- var
- ResultAddress : Pointer;
- begin
- ResultAddress:=Nil;
- dec(index);
- if Index < 0 then
- Index := 0;
- { Check Size. Accounts for Zero-length S, the double check is needed because
- Size can be maxint and will get <0 when adding index }
- if (Size>Length(S)) or
- (Index+Size>Length(S)) then
- Size:=Length(S)-Index;
- If Size>0 then
- begin
- If Index<0 Then
- Index:=0;
- ResultAddress:=Pointer(NewUnicodeString (Size));
- if ResultAddress<>Nil then
- begin
- Move (PUnicodeChar(S)[Index],ResultAddress^,Size*sizeof(UnicodeChar));
- PUnicodeRec(ResultAddress-UnicodeFirstOff)^.Len:=Size*sizeof(UnicodeChar);
- PUnicodeChar(ResultAddress+Size*sizeof(UnicodeChar))^:=#0;
- end;
- end;
- fpc_unicodestr_decr_ref(Pointer(fpc_unicodestr_copy));
- Pointer(fpc_unicodestr_Copy):=ResultAddress;
- end;
- Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
- var
- i,MaxLen : SizeInt;
- pc : punicodechar;
- begin
- Pos:=0;
- if Length(SubStr)>0 then
- begin
- MaxLen:=Length(source)-Length(SubStr);
- i:=0;
- pc:=@source[1];
- while (i<=MaxLen) do
- begin
- inc(i);
- if (SubStr[1]=pc^) and
- (CompareWord(Substr[1],pc^,Length(SubStr))=0) then
- begin
- Pos:=i;
- exit;
- end;
- inc(pc);
- end;
- end;
- end;
- { Faster version for a unicodechar alone }
- Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
- var
- i: SizeInt;
- pc : punicodechar;
- begin
- pc:=@s[1];
- for i:=1 to length(s) do
- begin
- if pc^=c then
- begin
- pos:=i;
- exit;
- end;
- inc(pc);
- end;
- pos:=0;
- end;
- Function Pos (c : AnsiString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(UnicodeString(c),s);
- end;
- Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(UnicodeString(c),s);
- end;
- Function Pos (c : UnicodeString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- result:=Pos(c,UnicodeString(s));
- end;
- { Faster version for a char alone. Must be implemented because }
- { pos(c: char; const s: shortstring) also exists, so otherwise }
- { using pos(char,pchar) will always call the shortstring version }
- { (exact match for first argument), also with $h+ (JM) }
- Function Pos (c : Char; Const s : UnicodeString) : SizeInt;
- var
- i: SizeInt;
- wc : unicodechar;
- pc : punicodechar;
- begin
- wc:=c;
- pc:=@s[1];
- for i:=1 to length(s) do
- begin
- if pc^=wc then
- begin
- pos:=i;
- exit;
- end;
- inc(pc);
- end;
- pos:=0;
- end;
- Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
- Var
- LS : SizeInt;
- begin
- If Length(S)=0 then
- exit;
- if index<=0 then
- exit;
- LS:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Len div sizeof(UnicodeChar);
- if (Index<=LS) and (Size>0) then
- begin
- UniqueString (S);
- if Size+Index>LS then
- Size:=LS-Index+1;
- if Index+Size<=LS then
- begin
- Dec(Index);
- Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
- end;
- Setlength(s,LS-Size);
- end;
- end;
- Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
- var
- Temp : UnicodeString;
- LS : SizeInt;
- begin
- If Length(Source)=0 then
- exit;
- if index <= 0 then
- index := 1;
- Ls:=Length(S);
- if index > LS then
- index := LS+1;
- Dec(Index);
- Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
- SetLength(Temp,Length(Source)+LS);
- If Index>0 then
- move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
- Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
- If (LS-Index)>0 then
- Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
- S:=Temp;
- end;
- Function UpCase(c:UnicodeChar):UnicodeChar;
- var
- s : UnicodeString;
- begin
- s:=c;
- result:=widestringmanager.UpperUnicodeStringProc(s)[1];
- end;
- function UpCase(const s : UnicodeString) : UnicodeString;
- begin
- result:=widestringmanager.UpperUnicodeStringProc(s);
- end;
- Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
- begin
- SetLength(S,Len);
- If (Buf<>Nil) and (Len>0) then
- Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
- end;
- Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
- var
- BufLen: SizeInt;
- begin
- SetLength(S,Len);
- If (Buf<>Nil) and (Len>0) then
- widestringmanager.Ansi2UnicodeMoveProc(Buf,S,Len);
- end;
- {$ifndef FPUNONE}
- Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
- Var
- SS : String;
- begin
- fpc_Val_Real_UnicodeStr := 0;
- if length(S) > 255 then
- code := 256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_Real_UnicodeStr,code);
- end;
- end;
- {$endif}
- function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
- var ss:shortstring;
- begin
- if length(s)>255 then
- code:=256
- else
- begin
- ss:=s;
- val(ss,fpc_val_enum_unicodestr,code);
- end;
- end;
- Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
- Var
- SS : String;
- begin
- if length(S) > 255 then
- begin
- fpc_Val_Currency_UnicodeStr:=0;
- code := 256;
- end
- else
- begin
- SS := S;
- Val(SS,fpc_Val_Currency_UnicodeStr,code);
- end;
- end;
- Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_UInt_UnicodeStr := 0;
- if length(S) > 255 then
- code := 256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_UInt_UnicodeStr,code);
- end;
- end;
- Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_SInt_UnicodeStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
- end;
- end;
- {$ifndef CPU64}
- Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_qword_UnicodeStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_qword_UnicodeStr,Code);
- end;
- end;
- Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
- Var
- SS : ShortString;
- begin
- fpc_Val_int64_UnicodeStr:=0;
- if length(S)>255 then
- code:=256
- else
- begin
- SS := S;
- Val(SS,fpc_Val_int64_UnicodeStr,Code);
- end;
- end;
- {$endif CPU64}
- {$ifndef FPUNONE}
- procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
- var
- ss : shortstring;
- begin
- str_real(len,fr,d,treal_type(rt),ss);
- s:=ss;
- end;
- {$endif}
- procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
- var ss:shortstring;
- begin
- fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
- s:=ss;
- end;
- {$ifdef FPC_HAS_STR_CURRENCY}
- procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
- var
- ss : shortstring;
- begin
- str(c:len:fr,ss);
- s:=ss;
- end;
- {$endif FPC_HAS_STR_CURRENCY}
- Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
- Var
- SS : ShortString;
- begin
- Str (v:Len,SS);
- S:=SS;
- end;
- Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
- Var
- SS : ShortString;
- begin
- str(v:Len,SS);
- S:=SS;
- end;
- {$ifndef CPU64}
- Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
- Var
- SS : ShortString;
- begin
- Str (v:Len,SS);
- S:=SS;
- end;
- Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
- Var
- SS : ShortString;
- begin
- str(v:Len,SS);
- S:=SS;
- end;
- {$endif CPU64}
- { converts an utf-16 code point or surrogate pair to utf-32 }
- function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
- var
- w: unicodechar;
- begin
- { UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
- { are the same in UTF-32 }
- w:=s[index];
- if (w<=#$d7ff) or
- (w>=#$e000) then
- begin
- result:=UCS4Char(w);
- len:=1;
- end
- { valid surrogate pair? }
- else if (w<=#$dbff) and
- { w>=#$d7ff check not needed, checked above }
- (index<length(s)) and
- (s[index+1]>=#$dc00) and
- (s[index+1]<=#$dfff) then
- { convert the surrogate pair to UTF-32 }
- begin
- result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
- len:=2;
- end
- else
- { invalid surrogate -> do nothing }
- begin
- result:=UCS4Char(w);
- len:=1;
- end;
- end;
- function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- if assigned(Source) then
- Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
- else
- Result:=0;
- end;
- function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
- var
- i,j : SizeUInt;
- w : word;
- lw : longword;
- len : longint;
- begin
- result:=0;
- if source=nil then
- exit;
- i:=0;
- j:=0;
- if assigned(Dest) then
- begin
- while (i<SourceChars) and (j<MaxDestBytes) do
- begin
- w:=word(Source[i]);
- case w of
- 0..$7f:
- begin
- Dest[j]:=char(w);
- inc(j);
- end;
- $80..$7ff:
- begin
- if j+1>=MaxDestBytes then
- break;
- Dest[j]:=char($c0 or (w shr 6));
- Dest[j+1]:=char($80 or (w and $3f));
- inc(j,2);
- end;
- $800..$d7ff,$e000..$ffff:
- begin
- if j+2>=MaxDestBytes then
- break;
- Dest[j]:=char($e0 or (w shr 12));
- Dest[j+1]:=char($80 or ((w shr 6) and $3f));
- Dest[j+2]:=char($80 or (w and $3f));
- inc(j,3);
- end;
- $d800..$dbff:
- {High Surrogates}
- begin
- if j+3>=MaxDestBytes then
- break;
- if (i<sourcechars-1) and
- (word(Source[i+1]) >= $dc00) and
- (word(Source[i+1]) <= $dfff) then
- begin
- lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
- Dest[j]:=char($f0 or (lw shr 18));
- Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
- Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
- Dest[j+3]:=char($80 or (lw and $3f));
- inc(j,4);
- inc(i);
- end;
- end;
- end;
- inc(i);
- end;
- if j>SizeUInt(MaxDestBytes-1) then
- j:=MaxDestBytes-1;
- Dest[j]:=#0;
- end
- else
- begin
- while i<SourceChars do
- begin
- case word(Source[i]) of
- $0..$7f:
- inc(j);
- $80..$7ff:
- inc(j,2);
- $800..$d7ff,$e000..$ffff:
- inc(j,3);
- $d800..$dbff:
- begin
- if (i<sourcechars-1) and
- (word(Source[i+1]) >= $dc00) and
- (word(Source[i+1]) <= $dfff) then
- begin
- inc(j,4);
- inc(i);
- end;
- end;
- end;
- inc(i);
- end;
- end;
- result:=j+1;
- end;
- function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- if assigned(Source) then
- Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
- else
- Result:=0;
- end;
- function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
- const
- UNICODE_INVALID=63;
- var
- InputUTF8: SizeUInt;
- IBYTE: BYTE;
- OutputUnicode: SizeUInt;
- PRECHAR: SizeUInt;
- TempBYTE: BYTE;
- CharLen: SizeUint;
- LookAhead: SizeUInt;
- UC: SizeUInt;
- begin
- if not assigned(Source) then
- begin
- result:=0;
- exit;
- end;
- result:=SizeUInt(-1);
- InputUTF8:=0;
- OutputUnicode:=0;
- PreChar:=0;
- if Assigned(Dest) Then
- begin
- while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
- begin
- IBYTE:=byte(Source[InputUTF8]);
- if (IBYTE and $80) = 0 then
- begin
- //One character US-ASCII, convert it to unicode
- if IBYTE = 10 then
- begin
- If (PreChar<>13) and FALSE then
- begin
- //Expand to crlf, conform UTF-8.
- //This procedure will break the memory alocation by
- //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
- if OutputUnicode+1<MaxDestChars then
- begin
- Dest[OutputUnicode]:=WideChar(13);
- inc(OutputUnicode);
- Dest[OutputUnicode]:=WideChar(10);
- inc(OutputUnicode);
- PreChar:=10;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(13);
- inc(OutputUnicode);
- end;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(IBYTE);
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- end
- else
- begin
- Dest[OutputUnicode]:=WideChar(IBYTE);
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- inc(InputUTF8);
- end
- else
- begin
- TempByte:=IBYTE;
- CharLen:=0;
- while (TempBYTE and $80)<>0 do
- begin
- TempBYTE:=(TempBYTE shl 1) and $FE;
- inc(CharLen);
- end;
- //Test for the "CharLen" conforms UTF-8 string
- //This means the 10xxxxxx pattern.
- if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
- begin
- //Insuficient chars in string to decode
- //UTF-8 array. Fallback to single char.
- CharLen:= 1;
- end;
- for LookAhead := 1 to CharLen-1 do
- begin
- if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
- ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
- begin
- //Invalid UTF-8 sequence, fallback.
- CharLen:= LookAhead;
- break;
- end;
- end;
- UC:=$FFFF;
- case CharLen of
- 1: begin
- //Not valid UTF-8 sequence
- UC:=UNICODE_INVALID;
- end;
- 2: begin
- //Two bytes UTF, convert it
- UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
- UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
- if UC <= $7F then
- begin
- //Invalid UTF sequence.
- UC:=UNICODE_INVALID;
- end;
- end;
- 3: begin
- //Three bytes, convert it to unicode
- UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
- if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
- begin
- //Invalid UTF-8 sequence
- UC:= UNICODE_INVALID;
- End;
- end;
- 4: begin
- //Four bytes, convert it to two unicode characters
- UC:= (byte(Source[InputUTF8]) and $07) shl 18;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
- if (UC < $10000) or (UC > $10FFFF) then
- begin
- UC:= UNICODE_INVALID;
- end
- else
- begin
- { only store pair if room }
- dec(UC,$10000);
- if (OutputUnicode<MaxDestChars-1) then
- begin
- Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
- inc(OutputUnicode);
- UC:=(UC and $3ff) + $DC00;
- end
- else
- begin
- InputUTF8:= InputUTF8 + CharLen;
- { don't store anything }
- CharLen:=0;
- end;
- end;
- end;
- 5,6,7: begin
- //Invalid UTF8 to unicode conversion,
- //mask it as invalid UNICODE too.
- UC:=UNICODE_INVALID;
- end;
- end;
- if CharLen > 0 then
- begin
- PreChar:=UC;
- Dest[OutputUnicode]:=WideChar(UC);
- inc(OutputUnicode);
- end;
- InputUTF8:= InputUTF8 + CharLen;
- end;
- end;
- Result:=OutputUnicode+1;
- end
- else
- begin
- while (InputUTF8<SourceBytes) do
- begin
- IBYTE:=byte(Source[InputUTF8]);
- if (IBYTE and $80) = 0 then
- begin
- //One character US-ASCII, convert it to unicode
- if IBYTE = 10 then
- begin
- if (PreChar<>13) and FALSE then
- begin
- //Expand to crlf, conform UTF-8.
- //This procedure will break the memory alocation by
- //FPC for the widestring, so never use it. Condition never true due the "and FALSE".
- inc(OutputUnicode,2);
- PreChar:=10;
- end
- else
- begin
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- end
- else
- begin
- inc(OutputUnicode);
- PreChar:=IBYTE;
- end;
- inc(InputUTF8);
- end
- else
- begin
- TempByte:=IBYTE;
- CharLen:=0;
- while (TempBYTE and $80)<>0 do
- begin
- TempBYTE:=(TempBYTE shl 1) and $FE;
- inc(CharLen);
- end;
- //Test for the "CharLen" conforms UTF-8 string
- //This means the 10xxxxxx pattern.
- if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
- begin
- //Insuficient chars in string to decode
- //UTF-8 array. Fallback to single char.
- CharLen:= 1;
- end;
- for LookAhead := 1 to CharLen-1 do
- begin
- if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
- ((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
- begin
- //Invalid UTF-8 sequence, fallback.
- CharLen:= LookAhead;
- break;
- end;
- end;
- UC:=$FFFF;
- case CharLen of
- 1: begin
- //Not valid UTF-8 sequence
- UC:=UNICODE_INVALID;
- end;
- 2: begin
- //Two bytes UTF, convert it
- UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
- UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
- if UC <= $7F then
- begin
- //Invalid UTF sequence.
- UC:=UNICODE_INVALID;
- end;
- end;
- 3: begin
- //Three bytes, convert it to unicode
- UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
- If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
- begin
- //Invalid UTF-8 sequence
- UC:= UNICODE_INVALID;
- end;
- end;
- 4: begin
- //Four bytes, convert it to two unicode characters
- UC:= (byte(Source[InputUTF8]) and $07) shl 18;
- UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
- UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
- UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
- if (UC < $10000) or (UC > $10FFFF) then
- UC:= UNICODE_INVALID
- else
- { extra character character }
- inc(OutputUnicode);
- end;
- 5,6,7: begin
- //Invalid UTF8 to unicode conversion,
- //mask it as invalid UNICODE too.
- UC:=UNICODE_INVALID;
- end;
- end;
- if CharLen > 0 then
- begin
- PreChar:=UC;
- inc(OutputUnicode);
- end;
- InputUTF8:= InputUTF8 + CharLen;
- end;
- end;
- Result:=OutputUnicode+1;
- end;
- end;
- function UTF8Encode(const s : Ansistring) : UTF8String; inline;
- begin
- Result:=UTF8Encode(UnicodeString(s));
- end;
- function UTF8Encode(const s : UnicodeString) : UTF8String;
- var
- i : SizeInt;
- hs : UTF8String;
- begin
- result:='';
- if s='' then
- exit;
- SetLength(hs,length(s)*3);
- i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
- if i>0 then
- begin
- SetLength(hs,i-1);
- result:=hs;
- end;
- end;
- function UTF8Decode(const s : UTF8String): UnicodeString;
- var
- i : SizeInt;
- hs : UnicodeString;
- begin
- result:='';
- if s='' then
- exit;
- SetLength(hs,length(s));
- i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
- if i>0 then
- begin
- SetLength(hs,i-1);
- result:=hs;
- end;
- end;
- function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- Result:=Utf8Encode(s);
- end;
- function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- Result:=Utf8Decode(s);
- end;
- function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
- var
- i, slen,
- destindex : SizeInt;
- len : longint;
- begin
- slen:=length(s);
- setlength(result,slen+1);
- i:=1;
- destindex:=0;
- while (i<=slen) do
- begin
- result[destindex]:=utf16toutf32(s,i,len);
- inc(destindex);
- inc(i,len);
- end;
- { destindex <= slen (surrogate pairs may have been merged) }
- { destindex+1 for terminating #0 (dynamic arrays are }
- { implicitely filled with zero) }
- setlength(result,destindex+1);
- end;
- { concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
- procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
- var
- p : PUnicodeChar;
- begin
- { if nc > $ffff, we need two places }
- if (index+ord(nc > $ffff)>length(s)) then
- if (length(s) < 10*256) then
- setlength(s,length(s)+10)
- else
- setlength(s,length(s)+length(s) shr 8);
- { we know that s is unique -> avoid uniquestring calls}
- p:=@s[index];
- if (nc<$ffff) then
- begin
- p^:=unicodechar(nc);
- inc(index);
- end
- else if (dword(nc)<=$10ffff) then
- begin
- p^:=unicodechar((nc - $10000) shr 10 + $d800);
- (p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
- inc(index,2);
- end
- else
- { invalid code point }
- begin
- p^:='?';
- inc(index);
- end;
- end;
- function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
- var
- i : SizeInt;
- resindex : SizeInt;
- begin
- { skip terminating #0 }
- SetLength(result,length(s)-1);
- resindex:=1;
- for i:=0 to high(s)-1 do
- ConcatUTF32ToUnicodeStr(s[i],result,resindex);
- { adjust result length (may be too big due to growing }
- { for surrogate pairs) }
- setlength(result,resindex-1);
- end;
- function WideStringToUCS4String(const s : WideString) : UCS4String;
- var
- i, slen,
- destindex : SizeInt;
- len : longint;
- begin
- slen:=length(s);
- setlength(result,slen+1);
- i:=1;
- destindex:=0;
- while (i<=slen) do
- begin
- result[destindex]:=utf16toutf32(s,i,len);
- inc(destindex);
- inc(i,len);
- end;
- { destindex <= slen (surrogate pairs may have been merged) }
- { destindex+1 for terminating #0 (dynamic arrays are }
- { implicitely filled with zero) }
- setlength(result,destindex+1);
- end;
- { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
- procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
- var
- p : PWideChar;
- begin
- { if nc > $ffff, we need two places }
- if (index+ord(nc > $ffff)>length(s)) then
- if (length(s) < 10*256) then
- setlength(s,length(s)+10)
- else
- setlength(s,length(s)+length(s) shr 8);
- { we know that s is unique -> avoid uniquestring calls}
- p:=@s[index];
- if (nc<$ffff) then
- begin
- p^:=widechar(nc);
- inc(index);
- end
- else if (dword(nc)<=$10ffff) then
- begin
- p^:=widechar((nc - $10000) shr 10 + $d800);
- (p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
- inc(index,2);
- end
- else
- { invalid code point }
- begin
- p^:='?';
- inc(index);
- end;
- end;
- function UCS4StringToWideString(const s : UCS4String) : WideString;
- var
- i : SizeInt;
- resindex : SizeInt;
- begin
- { skip terminating #0 }
- SetLength(result,length(s)-1);
- resindex:=1;
- for i:=0 to high(s)-1 do
- ConcatUTF32ToWideStr(s[i],result,resindex);
- { adjust result length (may be too big due to growing }
- { for surrogate pairs) }
- setlength(result,resindex-1);
- end;
- const
- SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
- SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
- procedure unimplementedunicodestring;
- begin
- {$ifdef FPC_HAS_FEATURE_CONSOLEIO}
- If IsConsole then
- begin
- Writeln(StdErr,SNoUnicodestrings);
- Writeln(StdErr,SRecompileWithUnicodestrings);
- end;
- {$endif FPC_HAS_FEATURE_CONSOLEIO}
- HandleErrorFrame(233,get_frame);
- end;
- {$warnings off}
- function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
- begin
- unimplementedunicodestring;
- end;
- function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
- begin
- unimplementedunicodestring;
- end;
- function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
- begin
- unimplementedunicodestring;
- end;
- function CharLengthPChar(const Str: PChar): PtrInt;
- begin
- unimplementedunicodestring;
- end;
- {$warnings on}
- procedure initunicodestringmanager;
- begin
- {$ifndef HAS_WIDESTRINGMANAGER}
- widestringmanager.Unicode2AnsiMoveProc:=@defaultUnicode2AnsiMove;
- widestringmanager.Ansi2UnicodeMoveProc:=@defaultAnsi2UnicodeMove;
- widestringmanager.UpperUnicodeStringProc:=@GenericUnicodeCase;
- widestringmanager.LowerUnicodeStringProc:=@GenericUnicodeCase;
- {$endif HAS_WIDESTRINGMANAGER}
- widestringmanager.CompareUnicodeStringProc:=@CompareUnicodeString;
- widestringmanager.CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
- {$ifdef FPC_WIDESTRING_EQUAL_UNICODESTRING}
- {$ifndef HAS_WIDESTRINGMANAGER}
- widestringmanager.Wide2AnsiMoveProc:=@defaultUnicode2AnsiMove;
- widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2UnicodeMove;
- widestringmanager.UpperWideStringProc:=@GenericUnicodeCase;
- widestringmanager.LowerWideStringProc:=@GenericUnicodeCase;
- {$endif HAS_WIDESTRINGMANAGER}
- widestringmanager.CompareWideStringProc:=@CompareUnicodeString;
- widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString;
- widestringmanager.CharLengthPCharProc:=@CharLengthPChar;
- {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
- end;
|