123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112 |
- unit GR32_LowLevel;
- (* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1 or LGPL 2.1 with linking exception
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Alternatively, the contents of this file may be used under the terms of the
- * Free Pascal modified version of the GNU Lesser General Public License
- * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
- * of this license are applicable instead of those above.
- * Please see the file LICENSE.txt for additional information concerning this
- * license.
- *
- * The Original Code is Graphics32
- *
- * The Initial Developer of the Original Code is
- * Alex A. Denisov
- *
- * Portions created by the Initial Developer are Copyright (C) 2000-2009
- * the Initial Developer. All Rights Reserved.
- *
- * ***** END LICENSE BLOCK ***** *)
- interface
- {$include GR32.inc}
- {$IFDEF PUREPASCAL}
- {$DEFINE USENATIVECODE}
- {$DEFINE USEMOVE}
- {$ENDIF}
- {$IFDEF USEINLINING}
- {$DEFINE USENATIVECODE}
- {$ENDIF}
- // Define WRAP_USEFLOATMOD to have Wrap(Single, Single) forward to FloatMod().
- // If WRAP_USEFLOATMOD is not defined then an iterative algorithm is used which is
- // very ineffective when the value is much out of bounds.
- {$define WRAP_USEFLOATMOD}
- uses
- {$if defined(FRAMEWORK_FMX)}
- FMX.Graphics,
- {$elseif defined(FRAMEWORK_VCL)}
- VCL.Graphics,
- {$else}
- Graphics,
- {$ifend}
- System.UITypes,
- GR32,
- GR32_Math,
- GR32_Bindings;
- //------------------------------------------------------------------------------
- //
- // FillLongword: An analogue of FillChar for 32 bit values
- //
- //------------------------------------------------------------------------------
- var FillLongword: procedure(var X; Count: Cardinal; Value: Longword);
- procedure FillWord(var X; Count: Cardinal; Value: Longword);
- //------------------------------------------------------------------------------
- //
- // MoveLongword: An analogue of Move optimized for 32 bit values
- // MoveWord: An analogue of Move optimized for 16 bit values
- //
- //------------------------------------------------------------------------------
- {$IFDEF USEMOVE}
- procedure MoveLongword(const Source; var Dest; Count: Integer); {$IFDEF USEINLINING} inline; {$ENDIF}
- {$ELSE}
- procedure MoveLongword(const Source; var Dest; Count: Integer);
- {$ENDIF}
- procedure MoveWord(const Source; var Dest; Count: Integer);
- //------------------------------------------------------------------------------
- //
- // StackAlloc: Allocates a 'small' block of memory on the stack
- //
- //------------------------------------------------------------------------------
- {$IFDEF USESTACKALLOC}
- function StackAlloc(Size: Integer): Pointer; register;
- // Pops memory allocated by StackAlloc
- procedure StackFree(P: Pointer); register;
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Swap: Exchange values
- //
- //------------------------------------------------------------------------------
- // Exchange two 32-bit values (except Swap(pointer, pointer))
- procedure Swap(var A, B: Pointer); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- procedure Swap(var A, B: Integer); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- procedure Swap(var A, B: TFixed); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- procedure Swap(var A, B: TColor32); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- procedure Swap32(var A, B); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Convert little-endian <-> big-endian
- function Swap16(Value: Word): Word; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- function Swap32(Value: Cardinal): Cardinal; overload; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function Swap64(Value: Int64): Int64; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Exchange A <-> B only if B < A
- procedure TestSwap(var A, B: Integer); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- procedure TestSwap(var A, B: TFixed); overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Exchange A <-> B only if B < A then restrict both to [0..Size-1] range.
- // Returns true if resulting range has common points with [0..Size-1] range.
- function TestClip(var A, B: Integer; const Size: Integer): Boolean; overload;
- function TestClip(var A, B: Integer; const Start, Stop: Integer): Boolean; overload;
- //------------------------------------------------------------------------------
- //
- // Min/Max: Returns min./max. value of A, B and C
- //
- //------------------------------------------------------------------------------
- function Min(const A, B, C: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- function Max(const A, B, C: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Constrain, Clamp: Constrain value to range
- //
- //------------------------------------------------------------------------------
- // Return value constrained to [Lo..Hi] range
- function Constrain(const Value, Lo, Hi: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- function Constrain(const Value, Lo, Hi: Single): Single; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Returns value constrained to [min(Constrain1, Constrain2)..max(Constrain1, Constrain2] range
- function SwapConstrain(const Value: Integer; Constrain1, Constrain2: Integer): Integer;
- // Clamp integer value to [0..255] range
- function Clamp(const Value: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Clamp integer value to [0..Max] range
- function Clamp(Value, Max: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- // Clamp integer value to [Min..Max] range. Same as Constrain with same parameters.
- function Clamp(Value, Min, Max: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Wrap: Constrain value to range with wrap around
- //
- //------------------------------------------------------------------------------
- // Wrap integer value to [0..Max] range
- function Wrap(Value, Max: Integer): Integer; overload; {$IFDEF USENATIVECODE} inline; {$ENDIF}
- // Same but [Min..Max] range. Min is assumed to be <= Max
- function Wrap(Value, Min, Max: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Wrap single value to [0..Max) range.
- // Basically the same as FloatMod except:
- // - The upper limit is expected to always be positive.
- // - If Max=0, then 0 is returned.
- // Unlike the integer version of Wrap, the upper limit is exclusive.
- // NAN is not checked.
- function Wrap(Value, Max: Single): Single; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Same as Wrap above but Value is by ref and Max is an integer
- procedure WrapMem(var Value: Single; Max: Cardinal); {$IFDEF USEINLINING} inline; {$ENDIF}
- // Fast Wrap alternatives for cases where range+1 is a power of two (e.g. 3, 7, 15, etc)
- function WrapPow2(Value, Max: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function WrapPow2(Value, Min, Max: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Mirror, Reflect: Constrain value to range with mirroring
- //
- //------------------------------------------------------------------------------
- // Mirror:
- // - Has symmetry around the center of the edge value/pixel.
- // - Cycle is (2*Max).
- // - Example (Max=3): 012321012321 (2 cycles)
- // - Example (Max=4): 0123432101234321 (2 cycles)
- //
- // Reflect:
- // - Has symmetry around the end of the edge value/pixel.
- // - Cycle is (2*(Max+1)).
- // - Example (Max=3): 0123321001233210 (2 cycles)
- // - Example (Max=4): 01234432100123443210 (2 cycles)
- //------------------------------------------------------------------------------
- // Mirror integer value in [0..Max] range
- function Mirror(Value, Max: Integer): Integer; overload;
- // Mirror integer value in [Min..Max] range
- function Mirror(Value, Min, Max: Integer): Integer; overload;
- // Reflect integer value in [0..Max] range
- function Reflect(Value, Max: Integer): Integer; overload;
- // Reflect integer value in [Min..Max] range
- function Reflect(Value, Min, Max: Integer): Integer; overload;
- // Fast Reflect alternatives for cases where range+1 is a power of two (e.g. 3, 7, 15, etc)
- function ReflectPow2(Value, Max: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function ReflectPow2(Value, Min, Max: Integer): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Clamp/Wrap/Mirror/Reflect
- //
- //------------------------------------------------------------------------------
- // Functions to determine appropiate wrap procs (normal or power of 2 optimized)
- function GetOptimalWrap(Max: Integer): TWrapProc; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function GetOptimalWrap(Min, Max: Integer): TWrapProcEx; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function GetOptimalReflect(Max: Integer): TWrapProc; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function GetOptimalReflect(Min, Max: Integer): TWrapProcEx; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Functions to retrieve correct WrapProc given WrapMode (and range) }
- function GetWrapProc(WrapMode: TWrapMode): TWrapProc; overload;
- function GetWrapProc(WrapMode: TWrapMode; Max: Integer): TWrapProc; overload;
- function GetWrapProcEx(WrapMode: TWrapMode): TWrapProcEx; overload;
- function GetWrapProcEx(WrapMode: TWrapMode; Min, Max: Integer): TWrapProcEx; overload;
- const
- WRAP_PROCS: array[TWrapMode] of TWrapProc = (Clamp, Wrap, Mirror{$ifdef GR32_WRAPMODE_REFLECT}, Reflect{$endif});
- WRAP_PROCS_EX: array[TWrapMode] of TWrapProcEx = (Clamp, Wrap, Mirror{$ifdef GR32_WRAPMODE_REFLECT}, Reflect{$endif});
- //------------------------------------------------------------------------------
- //
- // Div255: Fast integer division by 255 with limited range
- //
- //------------------------------------------------------------------------------
- // Fast integer division by 255.
- // Valid for the range [0..$ffff]
- function Div255(Value: Word): Word; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Possibly even faster integer division by 255.
- // Valid for the range [0..255*255] }
- function FastDiv255(Value: Word): Word; {$IFDEF USEINLINING} inline; {$ENDIF}
- // Fast rounded integer division by 255.
- // Valid for the range [0..255*255]
- function Div255Round(Value: Word): Word; {$IFDEF USEINLINING} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // FastRound, FastTrunc, FastFloor, and FastCeil
- // Fast alternatives to the RTL Round, Trunc, Floor and Ceil
- //
- //------------------------------------------------------------------------------
- function FastFloor(Value: TFloat): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function FastFloor(Value: Double): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function FastCeil(Value: TFloat): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- function FastCeil(Value: Double): Integer; overload; {$IFDEF USEINLINING} inline; {$ENDIF}
- type
- TFastRoundSingleProc = function(Value: TFloat): Integer;
- TFastRoundDoubleProc = function(Value: Double): Integer;
- var
- // Trunc, Round, Floor, Ceil bindings
- FastTrunc: TFastRoundSingleProc;
- FastRound: TFastRoundSingleProc;
- FastFloorSingle: TFastRoundSingleProc;
- FastFloorDouble: TFastRoundDoubleProc;
- FastCeilSingle: TFastRoundSingleProc;
- FastCeilDouble: TFastRoundDoubleProc;
- //------------------------------------------------------------------------------
- //
- // SAR: Shift right with sign conservation
- //
- //------------------------------------------------------------------------------
- // Note that for PUREPASCAL SAR_n(x) is implemented as (x div 2^n).
- // This works for positive values but not for negative values as both Delphi and FPC
- // compiles (x div 2^n) to:
- //
- // ADD EAX, $00007FFF
- // TEST EAX, EAX
- // JNS :positive
- // ADD EAX, $0000FFFF
- // :positive
- // SAR EAX, n
- //
- function SAR_3(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_4(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_6(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_8(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_9(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_11(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_12(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_13(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_14(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_15(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- function SAR_16(Value: Integer): Integer; {$IFDEF PUREPASCAL} inline; {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // ColorSwap exchanges ARGB <-> ABGR and fills A with $FF
- //
- //------------------------------------------------------------------------------
- function ColorSwap(WinColor: TColor): TColor32;
- //------------------------------------------------------------------------------
- //
- // Bindings
- //
- //------------------------------------------------------------------------------
- var
- LowLevelRegistry: TFunctionRegistry;
- const
- FID_FILLLONGWORD = 0;
- FID_FAST_TRUNC = 1;
- FID_FAST_ROUND = 2;
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- implementation
- uses
- {$if not defined(FPC)}
- System.Math,
- {$else}
- SysUtils,
- Math,
- {$ifend}
- GR32.Types.SIMD;
- {$R-}{$Q-} // switch off overflow and range checking
- //------------------------------------------------------------------------------
- //
- // FillLongword
- //
- //------------------------------------------------------------------------------
- procedure FillLongword_Pas(var X; Count: Cardinal; Value: Longword);
- var
- I: Integer;
- P: PIntegerArray;
- begin
- P := PIntegerArray(@X);
- for I := Count - 1 downto 0 do
- P[I] := Integer(Value);
- end;
- {$IFNDEF PUREPASCAL}
- procedure FillLongword_ASM(var X; Count: Cardinal; Value: Longword); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX = X; EDX = Count; ECX = Value
- PUSH EDI
- MOV EDI,EAX // Point EDI to destination
- MOV EAX,ECX
- MOV ECX,EDX
- REP STOSD // Fill count dwords
- @Exit:
- POP EDI
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX = X; EDX = Count; R8 = Value
- PUSH RDI
- MOV RDI,RCX // Point EDI to destination
- MOV RAX,R8 // copy value from R8 to RAX (EAX)
- MOV ECX,EDX // copy count to ECX
- TEST ECX,ECX
- JS @Exit
- REP STOSD // Fill count dwords
- @Exit:
- POP RDI
- {$ENDIF}
- end;
- {$IFNDEF OMIT_SSE2}
- procedure FillLongword_SSE2(var X; Count: Integer; Value: Longword); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX = X; EDX = Count; ECX = Value
- TEST EDX, EDX // if Count = 0 then
- JZ @Exit // Exit
- PUSH EDI // push EDI on stack
- MOV EDI, EAX // Point EDI to destination
- CMP EDX, 32
- JL @SmallLoop
- AND EAX, 3 // get aligned count
- TEST EAX, EAX // check if X is not dividable by 4
- JNZ @SmallLoop // otherwise perform slow small loop
- MOV EAX, EDI
- SHR EAX, 2 // bytes to count
- AND EAX, 3 // get aligned count
- ADD EAX,-4
- NEG EAX // get count to advance
- JZ @SetupMain
- SUB EDX, EAX // subtract aligning start from total count
- @AligningLoop:
- MOV [EDI], ECX
- ADD EDI, 4
- DEC EAX
- JNZ @AligningLoop
- @SetupMain:
- MOV EAX, EDX // EAX = remaining count
- SHR EAX, 2
- SHL EAX, 2
- SUB EDX, EAX // EDX = remaining count
- SHR EAX, 2
- MOVD XMM0, ECX
- PUNPCKLDQ XMM0, XMM0
- PUNPCKLDQ XMM0, XMM0
- @SSE2Loop:
- MOVDQA [EDI], XMM0
- ADD EDI, 16
- DEC EAX
- JNZ @SSE2Loop
- @SmallLoop:
- MOV EAX,ECX
- MOV ECX,EDX
- REP STOSD // Fill count dwords
- @ExitPOP:
- POP EDI
- @Exit:
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // RCX = X; RDX = Count; R8 = Value
- TEST RDX, RDX // if Count = 0 then
- JZ @Exit // Exit
- MOV R9, RCX // Point R9 to destination
- CMP RDX, 32
- JL @SmallLoop
- AND RCX, 3 // get aligned count
- TEST RCX, RCX // check if X is not dividable by 4
- JNZ @SmallLoop // otherwise perform slow small loop
- MOV RCX, R9
- SHR RCX, 2 // bytes to count
- AND RCX, 3 // get aligned count
- ADD RCX,-4
- NEG RCX // get count to advance
- JZ @SetupMain
- SUB RDX, RCX // subtract aligning start from total count
- @AligningLoop:
- MOV [R9], R8D
- ADD R9, 4
- DEC RCX
- JNZ @AligningLoop
- @SetupMain:
- MOV RCX, RDX // RCX = remaining count
- SHR RCX, 2
- SHL RCX, 2
- SUB RDX, RCX // RDX = remaining count
- SHR RCX, 2
- MOVD XMM0, R8D
- PUNPCKLDQ XMM0, XMM0
- PUNPCKLDQ XMM0, XMM0
- @SSE2Loop:
- MOVDQA [R9], XMM0
- ADD R9, 16
- DEC RCX
- JNZ @SSE2Loop
- TEST RDX, RDX
- JZ @Exit
- @SmallLoop:
- MOV [R9], R8D
- ADD R9, 4
- DEC RDX
- JNZ @SmallLoop
- @Exit:
- {$ENDIF}
- end;
- {$ENDIF}
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // FillWord
- //
- //------------------------------------------------------------------------------
- procedure FillWord(var X; Count: Cardinal; Value: LongWord);
- {$IFDEF USENATIVECODE}
- var
- I: Integer;
- P: PWordArray;
- begin
- P := PWordArray(@X);
- for I := Count - 1 downto 0 do
- P[I] := Value;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX = X; EDX = Count; ECX = Value
- PUSH EDI
- MOV EDI,EAX // Point EDI to destination
- MOV EAX,ECX
- MOV ECX,EDX
- TEST ECX,ECX
- JZ @exit
- REP STOSW // Fill count words
- @exit:
- POP EDI
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX = X; EDX = Count; R8D = Value
- PUSH RDI
- MOV RDI,RCX // Point EDI to destination
- MOV EAX,R8D
- MOV ECX,EDX
- TEST ECX,ECX
- JZ @exit
- REP STOSW // Fill count words
- @exit:
- POP RDI
- {$ENDIF}
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // MoveLongword
- //
- //------------------------------------------------------------------------------
- procedure MoveLongword(const Source; var Dest; Count: Integer);
- {$IFDEF USEMOVE}
- begin
- Move(Source, Dest, Count shl 2);
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX = Source; EDX = Dest; ECX = Count
- PUSH ESI
- PUSH EDI
- MOV ESI,EAX
- MOV EDI,EDX
- CMP EDI,ESI
- JE @exit
- REP MOVSD
- @exit:
- POP EDI
- POP ESI
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // RCX = Source; RDX = Dest; R8 = Count
- PUSH RSI
- PUSH RDI
- MOV RSI,RCX
- MOV RDI,RDX
- MOV RCX,R8
- CMP RDI,RSI
- JE @exit
- REP MOVSD
- @exit:
- POP RDI
- POP RSI
- {$ENDIF}
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // MoveWord
- //
- //------------------------------------------------------------------------------
- procedure MoveWord(const Source; var Dest; Count: Integer);
- {$IFDEF USEMOVE}
- begin
- Move(Source, Dest, Count shl 1);
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX = X; EDX = Count; ECX = Value
- PUSH ESI
- PUSH EDI
- MOV ESI,EAX
- MOV EDI,EDX
- MOV EAX,ECX
- CMP EDI,ESI
- JE @exit
- REP MOVSW
- @exit:
- POP EDI
- POP ESI
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // RCX = Source; RDX = Dest; R8 = Count
- CMP RCX,RDX
- JE @exit
- TEST R8,R8
- JZ @exit
- PUSH RSI
- PUSH RDI
- MOV RSI,RCX
- MOV RDI,RDX
- MOV RCX,R8
- REP MOVSW
- POP RDI
- POP RSI
- @exit:
- {$ENDIF}
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // Swap
- //
- //------------------------------------------------------------------------------
- procedure Swap(var A, B: Pointer);
- var
- T: Pointer;
- begin
- T := A;
- A := B;
- B := T;
- end;
- //------------------------------------------------------------------------------
- procedure Swap(var A, B: Integer);
- var
- T: Integer;
- begin
- T := A;
- A := B;
- B := T;
- end;
- //------------------------------------------------------------------------------
- procedure Swap(var A, B: TFixed);
- var
- T: TFixed;
- begin
- T := A;
- A := B;
- B := T;
- end;
- //------------------------------------------------------------------------------
- procedure Swap(var A, B: TColor32);
- var
- T: TColor32;
- begin
- T := A;
- A := B;
- B := T;
- end;
- //------------------------------------------------------------------------------
- procedure Swap32(var A, B);
- var
- T: Integer;
- begin
- T := Integer(A);
- Integer(A) := Integer(B);
- Integer(B) := T;
- end;
- //------------------------------------------------------------------------------
- function Swap16(Value: Word): Word;
- {$IFDEF USENATIVECODE}
- begin
- Result := System.Swap(Value);
- {$ELSE}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX, ECX
- {$ENDIF}
- XCHG AL, AH
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Swap32(Value: Cardinal): Cardinal;
- {$IFDEF PUREPASCAL}
- type
- TTwoWords = array [0..1] of Word;
- begin
- TTwoWords(Result)[1] := System.Swap(TTwoWords(Value)[0]);
- TTwoWords(Result)[0] := System.Swap(TTwoWords(Value)[1]);
- {$ELSE}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX, ECX
- {$ENDIF}
- BSWAP EAX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Swap64(Value: Int64): Int64;
- type
- TFourWords = array [0..3] of Word;
- begin
- TFourWords(Result)[3] := System.Swap(TFourWords(Value)[0]);
- TFourWords(Result)[2] := System.Swap(TFourWords(Value)[1]);
- TFourWords(Result)[1] := System.Swap(TFourWords(Value)[2]);
- TFourWords(Result)[0] := System.Swap(TFourWords(Value)[3]);
- end;
- //------------------------------------------------------------------------------
- procedure TestSwap(var A, B: Integer);
- var
- T: Integer;
- begin
- if B < A then
- begin
- T := A;
- A := B;
- B := T;
- end;
- end;
- //------------------------------------------------------------------------------
- procedure TestSwap(var A, B: TFixed);
- var
- T: TFixed;
- begin
- if B < A then
- begin
- T := A;
- A := B;
- B := T;
- end;
- end;
- //------------------------------------------------------------------------------
- function TestClip(var A, B: Integer; const Size: Integer): Boolean;
- begin
- TestSwap(A, B); // now A = min(A,B) and B = max(A, B)
- if A < 0 then
- A := 0;
- if B >= Size then
- B := Size - 1;
- Result := B >= A;
- end;
- //------------------------------------------------------------------------------
- function TestClip(var A, B: Integer; const Start, Stop: Integer): Boolean;
- begin
- TestSwap(A, B); // now A = min(A,B) and B = max(A, B)
- if A < Start then
- A := Start;
- if B >= Stop then
- B := Stop - 1;
- Result := B >= A;
- end;
- //------------------------------------------------------------------------------
- //
- // Min/Max
- //
- //------------------------------------------------------------------------------
- function Max(const A, B, C: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- if A > B then
- Result := A
- else
- Result := B;
- if C > Result then
- Result := C;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV RAX,RCX
- MOV RCX,R8
- {$ENDIF}
- CMP EDX,EAX
- CMOVG EAX,EDX
- CMP ECX,EAX
- CMOVG EAX,ECX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Min(const A, B, C: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- if A < B then
- Result := A
- else
- Result := B;
- if C < Result then
- Result := C;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV RAX,RCX
- MOV RCX,R8
- {$ENDIF}
- CMP EDX,EAX
- CMOVL EAX,EDX
- CMP ECX,EAX
- CMOVL EAX,ECX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // Constrain
- //
- //------------------------------------------------------------------------------
- function Constrain(const Value, Lo, Hi: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- Result := Value;
- if Result < Lo then
- Result := Lo
- else
- if Result > Hi then
- Result := Hi;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- MOV ECX,R8D
- {$ENDIF}
- CMP EDX,EAX
- CMOVG EAX,EDX
- CMP ECX,EAX
- CMOVL EAX,ECX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Constrain(const Value, Lo, Hi: Single): Single; overload;
- begin
- Result := Value;
- if Result < Lo then
- Result := Lo
- else
- if Result > Hi then
- Result := Hi;
- end;
- //------------------------------------------------------------------------------
- function SwapConstrain(const Value: Integer; Constrain1, Constrain2: Integer): Integer;
- begin
- TestSwap(Constrain1, Constrain2);
- Result := Value;
- if Result < Constrain1 then
- Result := Constrain1
- else
- if Result > Constrain2 then
- Result := Constrain2;
- end;
- //------------------------------------------------------------------------------
- //
- // Clamp
- //
- //------------------------------------------------------------------------------
- function Clamp(const Value: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- Result := Value;
- if Result > 255 then
- Result := 255
- else
- if Result < 0 then
- Result := 0;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- // in x64 calling convention parameters are passed in ECX, EDX, R8 & R9
- MOV EAX,ECX
- {$ENDIF}
- TEST EAX,$FFFFFF00
- JNZ @1
- RET
- @1: JS @2
- MOV EAX,$FF
- RET
- @2: XOR EAX,EAX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Clamp(Value, Max: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- Result := Value;
- if Result > Max then
- Result := Max
- else
- if Result < 0 then
- Result := 0;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- MOV ECX,R8D
- {$ENDIF}
- CMP EAX,EDX
- JG @Above
- TEST EAX,EAX
- JL @Below
- RET
- @Above:
- MOV EAX,EDX
- RET
- @Below:
- MOV EAX,0
- RET
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Clamp(Value, Min, Max: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- Result := Value;
- if Result > Max then
- Result := Max
- else
- if Result < Min then
- Result := Min;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- MOV ECX,R8D
- {$ENDIF}
- CMP EDX,EAX
- CMOVG EAX,EDX
- CMP ECX,EAX
- CMOVL EAX,ECX
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // Wrap
- //
- //------------------------------------------------------------------------------
- function Wrap(Value, Max: Integer): Integer;
- {$IFDEF USENATIVECODE}
- begin
- Inc(Max);
- if (Value < 0) then
- Value := Value + Max * (-Value div Max + 1);
- Result := Value mod Max;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- MOV ECX,R8D
- LEA ECX,[RDX+1]
- {$ELSE}
- LEA ECX,[EDX+1]
- {$ENDIF}
- CDQ
- IDIV ECX
- MOV EAX,EDX
- TEST EAX,EAX
- JNL @Exit
- ADD EAX,ECX
- @Exit:
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function Wrap(Value, Min, Max: Integer): Integer;
- var
- Range: integer;
- begin
- Range := Max - Min + 1;
- if (Value < Min) then
- Value := Value + Range * ((Min - Value) div Range + 1);
- Result := Min + (Value - Min) mod Range;
- end;
- //------------------------------------------------------------------------------
- function Wrap(Value, Max: Single): Single;
- var
- Maxbin: Cardinal absolute Max;
- begin
- {$if defined(WRAP_USEFLOATMOD)}
- if (Maxbin shl 1 <> 0) then // Single=0 test trick
- Result := FloatMod(Value, Max)
- else
- Result := 0;
- {$else}
- if Max = 0 then
- begin
- Result := 0;
- Exit;
- end;
- Result := Value;
- while Result >= Max do
- Result := Result - Max;
- while Result < 0 do
- Result := Result + Max;
- {$ifend}
- end;
- //------------------------------------------------------------------------------
- procedure WrapMem(var Value: Single; Max: Cardinal);
- begin
- {$if defined(WRAP_USEFLOATMOD)}
- if (Max <> 0) then
- Value := FloatMod(Value, Max)
- else
- Value := 0;
- {$else}
- if Max = 0 then
- begin
- Value := 0;
- Exit;
- end;
- while Value >= Max do
- Value := Value - Max;
- while Value < 0 do
- Value := Value + Max;
- {$ifend}
- end;
- //------------------------------------------------------------------------------
- {$IFDEF USENATIVECODE}
- function DivMod(Dividend, Divisor: Integer; out Remainder: Integer): Integer;
- begin
- Remainder := Dividend mod Divisor;
- Result := Dividend div Divisor;
- end;
- {$ELSE}
- function DivMod(Dividend, Divisor: Integer; out Remainder: Integer): Integer; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- PUSH EBX
- MOV EBX,EDX
- CDQ
- IDIV EBX
- MOV [ECX],EDX
- POP EBX
- {$ENDIF}
- {$IFDEF TARGET_x64}
- PUSH RBX
- MOV EAX,ECX
- MOV ECX,R8D
- MOV EBX,EDX
- CDQ
- IDIV EBX
- MOV [RCX],EDX
- POP RBX
- {$ENDIF}
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- function WrapPow2(Value, Max: Integer): Integer; overload;
- begin
- Result := Value and Max;
- end;
- //------------------------------------------------------------------------------
- function WrapPow2(Value, Min, Max: Integer): Integer; overload;
- begin
- Result := (Value - Min) and (Max - Min) + Min;
- end;
- //------------------------------------------------------------------------------
- function GetOptimalWrap(Max: Integer): TWrapProc; overload;
- begin
- if (Max >= 0) and IsPowerOf2(Max + 1) then
- Result := WrapPow2
- else
- Result := Wrap;
- end;
- //------------------------------------------------------------------------------
- function GetOptimalWrap(Min, Max: Integer): TWrapProcEx; overload;
- begin
- if (Min >= 0) and (Max >= Min) and IsPowerOf2(Max - Min + 1) then
- Result := WrapPow2
- else
- Result := Wrap;
- end;
- //------------------------------------------------------------------------------
- //
- // Mirror
- //
- //------------------------------------------------------------------------------
- {$IFDEF PUREPASCAL}
- function Mirror(Value, Max: Integer): Integer;
- begin
- if Value >= 0 then
- Result := Value
- else
- Result := -Value;
- while (Result > Max) do
- Result := Abs(Max + Max - Result);
- end;
- {$ELSE}
- // FWIW, there's little, if any, benefit of using the assembler version; The pascal version is just as fast.
- function Mirror(Value, Max: Integer): Integer; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX, ECX // Value
- {$ENDIF}
- // EAX: Value
- // EDX: Max
- // Max2 := 2*Max
- LEA ECX, [EDX+EDX]
- // Result := Value
- @Loop:
- // Result := Abs(Result)
- TEST EAX, EAX
- JNL @Positive
- NEG EAX
- @Positive:
- // while (Result > Max) do
- CMP EAX, EDX
- JLE @Exit
- // Result := 2*Max - Result
- NEG EAX
- ADD EAX, ECX
- JMP @Loop
- @Exit:
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- function Mirror(Value, Min, Max: Integer): Integer;
- begin
- Result := Min + Mirror(Value - Min, Max - Min);
- end;
- //------------------------------------------------------------------------------
- //
- // Reflect
- //
- //------------------------------------------------------------------------------
- function Reflect(Value, Max: Integer): Integer;
- var
- Quotient: Integer;
- begin
- if (Value < 0) then
- begin
- Value := Value - Max;
- Quotient := DivMod(Value, Max + 1, Result);
- Inc(Result, Max);
- end else
- Quotient := DivMod(Value, Max + 1, Result);
- if (Quotient and 1 <> 0) then
- Result := Max - Result;
- end;
- //------------------------------------------------------------------------------
- function Reflect(Value, Min, Max: Integer): Integer;
- var
- Quotient: Integer;
- begin
- if (Value < Min) then
- begin
- Quotient := DivMod(Value - Max, Max - Min + 1, Result);
- Inc(Result, Max);
- end else
- begin
- Quotient := DivMod(Value - Min, Max - Min + 1, Result);
- Inc(Result, Min);
- end;
- if (Quotient and 1 <> 0) then
- Result := Max + Min - Result;
- end;
- //------------------------------------------------------------------------------
- function ReflectPow2(Value, Max: Integer): Integer; overload;
- begin
- if (Value and (Max + 1) = 0) then
- Result := Value and Max
- else
- Result := Max - (Value and Max);
- end;
- //------------------------------------------------------------------------------
- function ReflectPow2(Value, Min, Max: Integer): Integer; overload;
- begin
- Result := ReflectPow2(Value-Min, Max-Min)+Min;
- end;
- //------------------------------------------------------------------------------
- function GetOptimalReflect(Max: Integer): TWrapProc; overload;
- begin
- if (Max >= 0) and IsPowerOf2(Max + 1) then
- Result := ReflectPow2
- else
- Result := Reflect;
- end;
- //------------------------------------------------------------------------------
- function GetOptimalReflect(Min, Max: Integer): TWrapProcEx; overload;
- begin
- if (Min > 0) and (Max >= Min) and IsPowerOf2(Max - Min + 1) then
- Result := ReflectPow2
- else
- Result := Reflect;
- end;
- //------------------------------------------------------------------------------
- //
- // Clamp/Wrap/Mirror
- //
- //------------------------------------------------------------------------------
- function GetWrapProc(WrapMode: TWrapMode): TWrapProc; overload;
- begin
- case WrapMode of
- wmRepeat:
- Result := Wrap;
- wmMirror:
- Result := Mirror;
- {$ifdef GR32_WRAPMODE_REFLECT}
- wmReflect:
- Result := Reflect;
- {$endif}
- else //wmClamp:
- Result := Clamp;
- end;
- end;
- //------------------------------------------------------------------------------
- function GetWrapProc(WrapMode: TWrapMode; Max: Integer): TWrapProc; overload;
- begin
- case WrapMode of
- wmRepeat:
- Result := GetOptimalWrap(Max);
- wmMirror:
- Result := Mirror;
- {$ifdef GR32_WRAPMODE_REFLECT}
- wmReflect:
- Result := GetOptimalReflect(Max);
- {$endif}
- else //wmClamp:
- Result := Clamp;
- end;
- end;
- //------------------------------------------------------------------------------
- function GetWrapProcEx(WrapMode: TWrapMode): TWrapProcEx; overload;
- begin
- case WrapMode of
- wmRepeat:
- Result := Wrap;
- wmMirror:
- Result := Mirror;
- {$ifdef GR32_WRAPMODE_REFLECT}
- wmReflect:
- Result := Reflect;
- {$endif}
- else //wmClamp:
- Result := Clamp;
- end;
- end;
- //------------------------------------------------------------------------------
- function GetWrapProcEx(WrapMode: TWrapMode; Min, Max: Integer): TWrapProcEx; overload;
- begin
- case WrapMode of
- wmRepeat:
- Result := GetOptimalWrap(Min, Max);
- wmMirror:
- Result := Mirror;
- {$ifdef GR32_WRAPMODE_REFLECT}
- wmReflect:
- Result := GetOptimalReflect(Min, Max);
- {$endif}
- else //wmClamp:
- Result := Clamp;
- end;
- end;
- //------------------------------------------------------------------------------
- //
- // Div255: Fast integer division by 255 with limited range
- //
- //------------------------------------------------------------------------------
- function Div255(Value: Word): Word;
- begin
- {$if (defined(FPC)) or ((CompilerVersion >= 36.0) and (defined(TARGET_x86)))}
- // Delphi 12, 32-bit, already knows how to optimize division by 255.
- // Unfortunately it always optimizes as if the argument is a signed 32-bit.
- Result := Value div 255;
- {$else}
- // Input is 16 bit, intermediate result is 32-bit, result is 8 bit
- // Note: Algorithm doesn't take sign into account!
- Result := (Value * $8081) shr 23;
- {$ifend}
- end;
- //------------------------------------------------------------------------------
- function FastDiv255(Value: Word): Word;
- begin
- // Input is 16 bit, intermediate result is 32-bit (25 used), result is 8 bit
- // Note: Algorithm doesn't take sign into account!
- Result := (Value + ((Value + 257) shr 8)) shr 8;
- end;
- //------------------------------------------------------------------------------
- function Div255Round(Value: Word): Word;
- begin
- // Input is 16 bit, intermediate result is 24-bit, result is 8 bit
- // Note: Algorithm doesn't take sign into account!
- Result := ((Value + 128) * 257) shr 16;
- end;
- //------------------------------------------------------------------------------
- //
- // FastRound
- //
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- // FastRound_Pas
- //------------------------------------------------------------------------------
- function FastRound_Pas(Value: TFloat): Integer;
- begin
- Result := Round(Value);
- end;
- {$IFNDEF PUREPASCAL}
- //------------------------------------------------------------------------------
- // FastRound_SSE41
- //------------------------------------------------------------------------------
- function FastRound_SSE41(Value: TFloat): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSS xmm0, Value
- {$ifend}
- ROUNDSS xmm0, xmm0, SSE_ROUND.TO_NEAREST_INT + SSE_ROUND.NO_EXC
- CVTSS2SI eax, xmm0
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // FastTrunc
- //
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- // FastTrunc_Pas
- //------------------------------------------------------------------------------
- //
- // Just defer to RTL Trunc
- //
- function FastTrunc_Pas(Value: TFloat): Integer;
- begin
- Result := Trunc(Value);
- end;
- {$IFNDEF PUREPASCAL}
- //------------------------------------------------------------------------------
- // FastTrunc_SSE2
- //------------------------------------------------------------------------------
- //
- // Faster that RTL Trunc on x86 and x64
- //
- {$IFNDEF OMIT_SSE2}
- function FastTrunc_SSE2(Value: TFloat): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSS XMM0, Value
- {$ifend}
- CVTTSS2SI EAX, XMM0
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- // SlowTrunc_SSE2
- //------------------------------------------------------------------------------
- //
- // Faster that RTL Trunc on x64 (and sometimes on x86).
- //
- {$IFNDEF OMIT_SSE2}
- function SlowTrunc_SSE2(Value: TFloat): Integer; {$IFDEF FPC} assembler; {$ENDIF}
- var
- SaveMXCSR: Cardinal;
- NewMXCSR: Cardinal;
- asm
- XOR ECX, ECX
- // Save current rounding mode
- STMXCSR SaveMXCSR
- // Load rounding mode
- MOV EAX, SaveMXCSR
- // Do we need to change anything?
- MOV ECX, EAX
- NOT ECX
- AND ECX, MXCSR.TRUNC
- JZ @SkipSetMXCSR // Skip expensive LDMXCSR
- @SetMXCSR:
- // Save current rounding mode in ECX and flag that we need to restore it
- MOV ECX, EAX
- // Set rounding mode to truncation
- AND EAX, MXCSR.MASK
- OR EAX, MXCSR.TRUNC
- // Set new rounding mode
- MOV NewMXCSR, EAX
- LDMXCSR NewMXCSR
- @SkipSetMXCSR:
- {$if defined(TARGET_x86)}
- MOVSS XMM0, Value
- {$ifend}
- // Round/Trunc
- CVTSS2SI EAX, XMM0
- // Restore rounding mode
- // Did we modify it?
- TEST ECX, ECX
- JZ @SkipRestoreMXCSR // Skip expensive LDMXCSR
- // Restore old rounding mode
- LDMXCSR SaveMXCSR
- @SkipRestoreMXCSR:
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- // FastTrunc_SSE41
- //------------------------------------------------------------------------------
- //
- // Faster that RTL Trunc on x86
- //
- {$IFNDEF OMIT_SSE2}
- function FastTrunc_SSE41(Value: TFloat): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSS xmm0, Value
- {$ifend}
- ROUNDSS xmm0, xmm0, SSE_ROUND.TO_ZERO + SSE_ROUND.NO_EXC
- CVTSS2SI eax, xmm0
- end;
- {$ENDIF}
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // FastFloor
- //
- //------------------------------------------------------------------------------
- function FastFloor(Value: TFloat): Integer;
- begin
- Result := FastFloorSingle(Value);
- end;
- function FastFloor(Value: Double): Integer;
- begin
- Result := FastFloorDouble(Value);
- end;
- //------------------------------------------------------------------------------
- // FastFloorSingle_Pas
- //------------------------------------------------------------------------------
- function FastFloorSingle_Pas(Value: TFloat): Integer;
- begin
- Result := Integer(Trunc(Value));
- if Frac(Value) < 0 then
- Dec(Result);
- end;
- //------------------------------------------------------------------------------
- // FastFloorDouble_Pas
- //------------------------------------------------------------------------------
- function FastFloorDouble_Pas(Value: Double): Integer;
- begin
- Result := Integer(Trunc(Value));
- if Frac(Value) < 0 then
- Dec(Result);
- end;
- {$IFNDEF PUREPASCAL}
- //------------------------------------------------------------------------------
- // FastFloorSingle_SSE41
- //------------------------------------------------------------------------------
- {$IFNDEF OMIT_SSE2}
- function FastFloorSingle_SSE41(Value: Single): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSS xmm0, Value
- {$ifend}
- ROUNDSS xmm0, xmm0, SSE_ROUND.TO_NEG_INF + SSE_ROUND.NO_EXC
- CVTSS2SI eax, xmm0
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- // FastFloorDouble_SSE41
- //------------------------------------------------------------------------------
- {$IFNDEF OMIT_SSE2}
- function FastFloorDouble_SSE41(Value: Double): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSD xmm0, Value
- {$ifend}
- ROUNDSD xmm0, xmm0, SSE_ROUND.TO_NEG_INF + SSE_ROUND.NO_EXC
- CVTTSD2SI eax, xmm0
- end;
- {$ENDIF}
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // FastCeil
- //
- //------------------------------------------------------------------------------
- function FastCeil(Value: TFloat): Integer;
- begin
- Result := FastCeilSingle(Value);
- end;
- function FastCeil(Value: Double): Integer;
- begin
- Result := FastCeilDouble(Value);
- end;
- //------------------------------------------------------------------------------
- // FastCeilSingle_Pas
- //------------------------------------------------------------------------------
- function FastCeilSingle_Pas(Value: TFloat): Integer;
- begin
- Result := Integer(Trunc(Value));
- if Frac(Value) > 0 then
- Inc(Result);
- end;
- //------------------------------------------------------------------------------
- // FastCeilDouble_Pas
- //------------------------------------------------------------------------------
- function FastCeilDouble_Pas(Value: Double): Integer;
- begin
- Result := Integer(Trunc(Value));
- if Frac(Value) > 0 then
- Inc(Result);
- end;
- {$IFNDEF PUREPASCAL}
- //------------------------------------------------------------------------------
- // FastCeilSingle_SSE41
- //------------------------------------------------------------------------------
- function FastCeilSingle_SSE41(Value: Single): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSS xmm0, Value
- {$ifend}
- ROUNDSS xmm0, xmm0, SSE_ROUND.TO_POS_INF + SSE_ROUND.NO_EXC
- CVTSS2SI eax, xmm0
- end;
- //------------------------------------------------------------------------------
- // FastCeilDouble_SSE41
- //------------------------------------------------------------------------------
- function FastCeilDouble_SSE41(Value: Double): Integer; {$IFDEF FPC} assembler; {$IFDEF TARGET_X64} nostackframe; {$ENDIF}{$ENDIF}
- asm
- {$if defined(TARGET_x86)}
- MOVSD xmm0, Value
- {$ifend}
- ROUNDSD xmm0, xmm0, SSE_ROUND.TO_POS_INF + SSE_ROUND.NO_EXC
- CVTTSD2SI eax, xmm0
- end;
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // SAR: Shift right with sign conservation
- //
- //------------------------------------------------------------------------------
- function SAR_3(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 8;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,3
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_4(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 16;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,4
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_6(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 64;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,6
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_8(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 256;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,8
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_9(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 512;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,9
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_11(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 2048;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,11
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_12(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 4096;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,12
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_13(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 8192;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,13
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_14(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 16384;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,14
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_15(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 32768;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,15
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- function SAR_16(Value: Integer): Integer;
- {$IFDEF PUREPASCAL}
- begin
- Result := Value div 65536;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- SAR EAX,16
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // ColorSwap
- //
- //------------------------------------------------------------------------------
- function ColorSwap(WinColor: TColor): TColor32;
- {$IFDEF USENATIVECODE}
- var
- WCEn: TColor32Entry absolute WinColor;
- REn : TColor32Entry absolute Result;
- begin
- Result := WCEn.ARGB;
- REn.A := $FF;
- REn.R := WCEn.B;
- REn.B := WCEn.R;
- {$ELSE}
- {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- // EAX = WinColor
- // this function swaps R and B bytes in ABGR
- // and writes $FF into A component
- {$IFDEF TARGET_x64}
- MOV EAX,ECX
- {$ENDIF}
- BSWAP EAX
- MOV AL, $FF
- ROR EAX,8
- {$ENDIF}
- end;
- //------------------------------------------------------------------------------
- //
- // StackAlloc
- //
- //------------------------------------------------------------------------------
- {$IFDEF USESTACKALLOC}
- {$IFDEF PUREPASCAL}
- function StackAlloc(Size: Integer): Pointer;
- begin
- GetMem(Result, Size);
- end;
- procedure StackFree(P: Pointer);
- begin
- FreeMem(P);
- end;
- {$ELSE}
- { StackAlloc allocates a 'small' block of memory from the stack by
- decrementing SP. This provides the allocation speed of a local variable,
- but the runtime size flexibility of heap allocated memory.
- x64 implementation by Jameel Halabi
- }
- function StackAlloc(Size: Integer): Pointer; register; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- POP ECX // return address
- MOV EDX, ESP
- ADD EAX, 3
- AND EAX, not 3 // round up to keep ESP dword aligned
- CMP EAX, 4092
- JLE @@2
- @@1:
- SUB ESP, 4092
- PUSH EAX // make sure we touch guard page, to grow stack
- SUB EAX, 4096
- JNS @@1
- ADD EAX, 4096
- @@2:
- SUB ESP, EAX
- MOV EAX, ESP // function result = low memory address of block
- PUSH EDX // save original SP, for cleanup
- MOV EDX, ESP
- SUB EDX, 4
- PUSH EDX // save current SP, for sanity check (sp = [sp])
- PUSH ECX // return to caller
- {$ENDIF}
- {$IFDEF TARGET_x64}
- {$IFNDEF FPC}
- .NOFRAME
- {$ENDIF}
- POP R8 // return address
- MOV RDX, RSP // original SP
- ADD ECX, 15
- AND ECX, NOT 15 // round up to keep SP dqword aligned
- CMP ECX, 4088
- JLE @@2
- @@1:
- SUB RSP, 4088
- PUSH RCX // make sure we touch guard page, to grow stack
- SUB ECX, 4096
- JNS @@1
- ADD ECX, 4096
- @@2:
- SUB RSP, RCX
- MOV RAX, RSP // function result = low memory address of block
- PUSH RDX // save original SP, for cleanup
- MOV RDX, RSP
- SUB RDX, 8
- PUSH RDX // save current SP, for sanity check (sp = [sp])
- PUSH R8 // return to caller
- {$ENDIF}
- end;
- { StackFree pops the memory allocated by StackAlloc off the stack.
- - Calling StackFree is optional - SP will be restored when the calling routine
- exits, but it's a good idea to free the stack allocated memory ASAP anyway.
- - StackFree must be called in the same stack context as StackAlloc - not in
- a subroutine or finally block.
- - Multiple StackFree calls must occur in reverse order of their corresponding
- StackAlloc calls.
- - Built-in sanity checks guarantee that an improper call to StackFree will not
- corrupt the stack. Worst case is that the stack block is not released until
- the calling routine exits. }
- procedure StackFree(P: Pointer); register; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- POP ECX // return address
- MOV EDX, DWORD PTR [ESP]
- SUB EAX, 8
- CMP EDX, ESP // sanity check #1 (SP = [SP])
- JNE @Exit
- CMP EDX, EAX // sanity check #2 (P = this stack block)
- JNE @Exit
- MOV ESP, DWORD PTR [ESP+4] // restore previous SP
- @Exit:
- PUSH ECX // return to caller
- {$ENDIF}
- {$IFDEF TARGET_x64}
- {$IFNDEF FPC}
- .NOFRAME
- {$ENDIF}
- POP R8 // return address
- MOV RDX, QWORD PTR [RSP]
- SUB RCX, 16
- CMP RDX, RSP // sanity check #1 (SP = [SP])
- JNE @Exit
- CMP RDX, RCX // sanity check #2 (P = this stack block)
- JNE @Exit
- MOV RSP, QWORD PTR [RSP + 8] // restore previous SP
- @Exit:
- PUSH R8 // return to caller
- {$ENDIF}
- end;
- {$ENDIF}
- {$ENDIF}
- //------------------------------------------------------------------------------
- //
- // Bindings
- //
- //------------------------------------------------------------------------------
- procedure RegisterBindings;
- begin
- {$WARN SYMBOL_EXPERIMENTAL OFF}
- LowLevelRegistry := NewRegistry('GR32_LowLevel bindings');
- LowLevelRegistry.RegisterBinding(FID_FILLLONGWORD, @@FillLongWord, 'FillLongWord');
- LowLevelRegistry.RegisterBinding(FID_FAST_TRUNC, @@FastTrunc, 'FastTrunc');
- LowLevelRegistry.RegisterBinding(FID_FAST_ROUND, @@FastRound, 'FastRound');
- LowLevelRegistry.RegisterBinding(@@FastFloorSingle, 'FastFloorSingle');
- LowLevelRegistry.RegisterBinding(@@FastFloorDouble, 'FastFloorDouble');
- LowLevelRegistry.RegisterBinding(@@FastCeilSingle, 'FastCeilSingle');
- LowLevelRegistry.RegisterBinding(@@FastCeilDouble, 'FastCeilDouble');
- LowLevelRegistry[@@FillLongWord].Add( @FillLongWord_Pas, [isPascal]).Name := 'FillLongWord_Pas';
- LowLevelRegistry[@@FastTrunc].Add( @FastTrunc_Pas, [isPascal]).Name := 'FastTrunc_Pas';
- LowLevelRegistry[@@FastRound].Add( @FastRound_Pas, [isPascal]).Name := 'FastRound_Pas';
- LowLevelRegistry[@@FastFloorSingle].Add( @FastFloorSingle_Pas, [isPascal]).Name := 'FastFloorSingle_Pas';
- LowLevelRegistry[@@FastFloorDouble].Add( @FastFloorDouble_Pas, [isPascal]).Name := 'FastFloorDouble_Pas';
- LowLevelRegistry[@@FastCeilSingle].Add( @FastCeilSingle_Pas, [isPascal]).Name := 'FastCeilSingle_Pas';
- LowLevelRegistry[@@FastCeilDouble].Add( @FastCeilDouble_Pas, [isPascal]).Name := 'FastCeilDouble_Pas';
- {$if (not defined(PUREPASCAL))}
- LowLevelRegistry[@@FillLongWord].Add( @FillLongWord_ASM, [isAssembler]).Name := 'FillLongWord_ASM';
- {$ifend}
- {$if (not defined(PUREPASCAL)) and (not defined(OMIT_SSE2))}
- LowLevelRegistry[@@FillLongWord].Add( @FillLongword_SSE2, [isSSE2]).Name := 'FillLongword_SSE2';
- LowLevelRegistry[@@FastTrunc].Add( @FastTrunc_SSE2, [isSSE2]).Name := 'FastTrunc_SSE2';
- LowLevelRegistry[@@FastRound].Add( @FastRound_SSE41, [isSSE41]).Name := 'FastRound_SSE41';
- LowLevelRegistry[@@FastFloorSingle].Add( @FastFloorSingle_SSE41, [isSSE41]).Name := 'FastFloorSingle_SSE41';
- LowLevelRegistry[@@FastFloorDouble].Add( @FastFloorDouble_SSE41, [isSSE41]).Name := 'FastFloorDouble_SSE41';
- LowLevelRegistry[@@FastCeilSingle].Add( @FastCeilSingle_SSE41, [isSSE41]).Name := 'FastCeilSingle_SSE41';
- LowLevelRegistry[@@FastCeilDouble].Add( @FastCeilDouble_SSE41, [isSSE41]).Name := 'FastCeilDouble_SSE41';
- {$if defined(BENCHMARK)}
- LowLevelRegistry[@@FastTrunc].Add( @SlowTrunc_SSE2, [isSSE2], BindingPriorityWorse).Name := 'SlowTrunc_SSE2';
- {$ifend}
- {$ifend}
- {$if defined(BENCHMARK)}
- LowLevelRegistry[@@FastFloorSingle].Add( @System.Math.Floor, [isReference], BindingPriorityWorse).Name := 'Math.Floor';
- {$ifend}
- LowLevelRegistry.RebindAll;
- end;
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- initialization
- RegisterBindings;
- end.
|