123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521 |
- unit GR32_BlendASM;
- (* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1 or LGPL 2.1 with linking exception
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Alternatively, the contents of this file may be used under the terms of the
- * Free Pascal modified version of the GNU Lesser General Public License
- * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
- * of this license are applicable instead of those above.
- * Please see the file LICENSE.txt for additional information concerning this
- * license.
- *
- * The Original Code is Graphics32
- *
- * The Initial Developer of the Original Code is
- * Alex A. Denisov
- *
- * Portions created by the Initial Developer are Copyright (C) 2000-2009
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- * Christian-W. Budde
- * - 2019/04/01 - Refactoring
- *
- * ***** END LICENSE BLOCK ***** *)
- interface
- {$I GR32.inc}
- uses
- GR32;
- function BlendReg_ASM(F, B: TColor32): TColor32;
- procedure BlendMem_ASM(F: TColor32; var B: TColor32);
- procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer);
- function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32;
- procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: Cardinal);
- procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer);
- procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer);
- function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32;
- procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal);
- {$IFDEF TARGET_x86}
- function MergeReg_ASM(F, B: TColor32): TColor32;
- {$ENDIF}
- procedure EMMS_ASM;
- implementation
- uses
- GR32_Blend,
- GR32_LowLevel,
- GR32_System;
- { ASM versions }
- const
- BlendRegistryPriorityASM = -256;
- { Assembler versions }
- const
- bias = $00800080;
- function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F
- // Result Z = Fa * Fargb + (1 - Fa) * Bargb
- // Result Z = P + Q
- {$IFDEF TARGET_x86}
- // EAX <- F
- // EDX <- B
- // Test Fa = 255 ?
- CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
- JNC @2
- // Test Fa = 0 ?
- TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
- JZ @1
- // Get weight W = Fa
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
- PUSH EBX
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,EDX // EBX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr 00 Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- POP EBX
- RET
- @1: MOV EAX,EDX
- @2:
- {$ENDIF}
- // EAX <- F
- // EDX <- B
- {$IFDEF TARGET_x64}
- MOV RAX, RCX
- // Test Fa = 255 ?
- CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
- JNC @2
- // Test Fa = 0 ?
- TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
- JZ @1
- // Get weight W = Fa
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
- // P = W * F
- MOV R9D,EAX // R9D <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR R9D,8 // R9D <- 00 Fa 00 Fg
- IMUL R9D,ECX // R9D <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
- OR EAX,R9D // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV R9D,EDX // R9D <- Ba Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
- SHR R9D,8 // R9D <- 00 Ba 00 Bg
- IMUL R9D,ECX // R9D <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr 00 Qb
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
- OR R9D,EDX // R9D <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,R9D // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- RET
- @1: MOV EAX,EDX
- @2:
- {$ENDIF}
- end;
- procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX <- F
- // [EDX] <- B
- // Test Fa = 0 ?
- TEST EAX,$FF000000 // Fa = 0 ? => do not write
- JZ @2
- // Get weight W = Fa
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @1
- PUSH EBX
- PUSH ESI
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias // add bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias // add bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- MOV ESI,[EDX]
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,ESI // EBX <- Ba Br Bg Bb
- AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL ESI,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD ESI,bias // add bias
- AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR ESI,8 // ESI <- 00 Qr 00 Qb
- ADD EBX,bias // add bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,ESI // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- MOV [EDX],EAX
- POP ESI
- POP EBX
- RET
- @1: MOV [EDX],EAX
- @2:
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX <- F
- // [RDX] <- B
- // Test Fa = 0 ?
- TEST ECX,$FF000000 // Fa = 0 ? => do not write
- JZ @2
- MOV EAX, ECX // EAX <- Fa Fr Fg Fb
- // Get weight W = Fa
- SHR ECX,24 // ECX <- 00 00 00 Fa
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @1
- // P = W * F
- MOV R8D,EAX // R8D <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR R8D,8 // R8D <- 00 Fa 00 Fg
- IMUL R8D,ECX // R8D <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD R8D,bias
- AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
- OR EAX,R8D // EAX <- Pa Pr Pg Pb
- MOV R9D,[RDX]
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV R8D,R9D // R8D <- Ba Br Bg Bb
- AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
- AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
- IMUL R9D,ECX // R9D <- Qr ** Qb **
- SHR R8D,8 // R8D <- 00 Ba 00 Bg
- IMUL R8D,ECX // R8D <- Qa ** Qg **
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
- SHR R9D,8 // R9D <- 00 Qr 00 Qb
- ADD R8D,bias
- AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
- OR R8D,R9D // R8D <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,R8D // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- MOV [RDX],EAX
- RET
- @1: MOV [RDX],EAX
- @2:
- {$ENDIF}
- end;
- procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- TEST ECX,ECX
- JZ @4
- PUSH EBX
- PUSH ESI
- PUSH EDI
- MOV ESI,EAX
- MOV EDI,EDX
- @1:
- // Test Fa = 0 ?
- MOV EAX,[ESI]
- TEST EAX,$FF000000
- JZ @3
- PUSH ECX
- // Get weight W = Fa
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @2
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias // add bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias // add bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- MOV EDX,[EDI]
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,EDX // EBX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL EDX,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias // add bias
- AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR EDX,8 // ESI <- 00 Qr 00 Qb
- ADD EBX,bias // add bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,ESI // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- @2:
- OR EAX,$FF000000
- MOV [EDI],EAX
- POP ECX
- @3:
- ADD ESI,4
- ADD EDI,4
- DEC ECX
- JNZ @1
- POP EDI
- POP ESI
- POP EBX
- @4:
- RET
- {$ENDIF}
- {$IFDEF TARGET_x64}
- TEST R8D,R8D
- JZ @4
- PUSH RDI
- MOV R9,RCX
- MOV RDI,RDX
- @1:
- MOV ECX,[RSI]
- TEST ECX,$FF000000
- JZ @3
- PUSH R8
- MOV R8D,ECX
- SHR R8D,24
- CMP R8D,$FF
- JZ @2
- MOV EAX,ECX
- AND ECX,$00FF00FF
- AND EAX,$FF00FF00
- IMUL ECX,R8D
- SHR EAX,8
- IMUL EAX,R8D
- ADD ECX,bias
- AND ECX,$FF00FF00
- SHR ECX,8
- ADD EAX,bias
- AND EAX,$FF00FF00
- OR ECX,EAX
- MOV EDX,[RDI]
- XOR R8D,$000000FF
- MOV EAX,EDX
- AND EDX,$00FF00FF
- AND EAX,$FF00FF00
- IMUL EDX, R8D
- SHR EAX,8
- IMUL EAX,R8D
- ADD EDX,bias
- AND EDX,$FF00FF00
- SHR EDX,8
- ADD EAX,bias
- AND EAX,$FF00FF00
- OR EAX,EDX
- ADD ECX,EAX
- @2:
- OR ECX,$FF000000
- MOV [RDI],ECX
- POP R8
- @3:
- ADD R9,4
- ADD RDI,4
- DEC R8D
- JNZ @1
- POP RDI
- @4:
- RET
- {$ENDIF}
- end;
- function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F multiplied by master alpha (M)
- // no checking for M = $FF, in this case Graphics32 uses BlendReg
- // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
- // Result Z = P + Q
- // EAX <- F
- // EDX <- B
- // ECX <- M
- {$IFDEF TARGET_x86}
- // Check Fa > 0 ?
- TEST EAX,$FF000000 // Fa = 0? => Result := EDX
- JZ @2
- PUSH EBX
- // Get weight W = Fa * M
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- INC ECX // 255:256 range bias
- SHR EBX,24 // EBX <- 00 00 00 Fa
- IMUL ECX,EBX // ECX <- 00 00 W **
- SHR ECX,8 // ECX <- 00 00 00 W
- JZ @1 // W = 0 ? => Result := EDX
- // P = W * F
- MOV EBX,EAX // EBX <- ** Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 00 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,EDX // EBX <- 00 Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 00 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr 00 Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- 00 Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- POP EBX
- RET
- @1:
- POP EBX
- @2: MOV EAX,EDX
- {$ENDIF}
- {$IFDEF TARGET_x64}
- MOV EAX,ECX // EAX <- Fa Fr Fg Fb
- TEST EAX,$FF000000 // Fa = 0? => Result := EDX
- JZ @1
- // Get weight W = Fa * M
- INC R8D // 255:256 range bias
- SHR ECX,24 // ECX <- 00 00 00 Fa
- IMUL R8D,ECX // R8D <- 00 00 W **
- SHR R8D,8 // R8D <- 00 00 00 W
- JZ @1 // W = 0 ? => Result := EDX
- // P = W * F
- MOV ECX,EAX // ECX <- ** Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
- IMUL EAX,R8D // EAX <- Pr ** Pb **
- SHR ECX,8 // ECX <- 00 Fa 00 Fg
- IMUL ECX,R8D // ECX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
- OR EAX,ECX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR R8D,$000000FF // R8D <- 1 - R8D
- // Q = W * B
- MOV ECX,EDX // ECX <- 00 Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
- IMUL EDX,R8D // EDX <- Qr ** Qb **
- SHR ECX,8 // ECX <- 00 Ba 00 Bg
- IMUL ECX,R8D // ECX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
- OR ECX,EDX // ECX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- RET
- @1: MOV EAX,EDX
- {$ENDIF}
- end;
- procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX <- F
- // [EDX] <- B
- // ECX <- M
- // Check Fa > 0 ?
- TEST EAX,$FF000000 // Fa = 0? => write nothing
- JZ @2
- PUSH EBX
- // Get weight W = Fa * M
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- SHR EBX,24 // EBX <- 00 00 00 Fa
- INC ECX // 255:256 range bias for M
- IMUL ECX,EBX // ECX <- 00 00 W **
- SHR ECX,8 // ECX <- 00 00 00 W
- JZ @1 // W = 0 ? => write nothing
- PUSH ESI
- // P = W * F
- MOV EBX,EAX // EBX <- ** Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W;
- MOV ESI,[EDX]
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,ESI // EBX <- 00 Br Bg Bb
- AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL ESI,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD ESI,bias
- AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR ESI,8 // ESI <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,ESI // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- MOV [EDX],EAX
- POP ESI
- @1: POP EBX
- @2:
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX <- F
- // [RDX] <- B
- // R8 <- M
- // ECX <- F
- // [EDX] <- B
- // R8 <- M
- // Check Fa > 0 ?
- TEST ECX,$FF000000 // Fa = 0? => write nothing
- JZ @1
- // Get weight W = Fa * M
- MOV EAX,ECX // EAX <- Fa Fr Fg Fb
- INC R8D // 255:256 range bias
- SHR EAX,24 // EAX <- 00 00 00 Fa
- IMUL R8D,EAX // R8D <- 00 00 W **
- ADD R8D,bias
- SHR R8D,8 // R8D <- 00 00 00 W
- JZ @1 // W = 0 ? => write nothing
- // P = W * F
- MOV EAX,ECX // EAX <- ** Fr Fg Fb
- AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
- AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
- IMUL ECX,R8D // ECX <- Pr ** Pb **
- SHR EAX,8 // EAX <- 00 Fa 00 Fg
- IMUL EAX,R8D // EAX <- Pa 00 Pg **
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
- SHR ECX,8 // ECX <- 00 Pr 00 Pb
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
- OR ECX,EAX // ECX <- Pa Pr Pg Pb
- // W = 1 - W
- MOV R9D,[RDX]
- XOR R8D,$000000FF // R8D <- 1 - R8
- // Q = W * B
- MOV EAX,R9D // EAX <- 00 Br Bg Bb
- AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
- AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
- IMUL R9D,R8D // R9D <- Qr ** Qb **
- SHR EAX,8 // EAX <- 00 00 00 Bg
- IMUL EAX,R8D // EAX <- 00 00 Qg **
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
- SHR R9D,8 // R9D <- 00 Qr ** Qb
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
- OR EAX,R9D // EAX <- 00 Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD ECX,EAX // ECX <- 00 Zr Zg Zb
- MOV [RDX],ECX
- @1:
- {$ENDIF}
- end;
- procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX <- Src
- // EDX <- Dst
- // ECX <- Count
- // test the counter for zero or negativity
- TEST ECX,ECX
- JS @4
- PUSH EBX
- PUSH ESI
- PUSH EDI
- MOV ESI,EAX // ESI <- Src
- MOV EDI,EDX // EDI <- Dst
- // loop start
- @1: MOV EAX,[ESI]
- TEST EAX,$FF000000
- JZ @3 // complete transparency, proceed to next point
- PUSH ECX // store counter
- // Get weight W = Fa
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @2
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W;
- MOV EDX,[EDI]
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,EDX // EBX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- @2:
- MOV [EDI],EAX
- POP ECX // restore counter
- @3:
- ADD ESI,4
- ADD EDI,4
- // loop end
- DEC ECX
- JNZ @1
- POP EDI
- POP ESI
- POP EBX
- @4:
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // RCX <- Src
- // RDX <- Dst
- // R8 <- Count
- // test the counter for zero or negativity
- TEST R8D,R8D
- JS @4
- MOV R10,RCX // R10 <- Src
- MOV R11,RDX // R11 <- Dst
- MOV ECX,R8D // RCX <- Count
- // loop start
- @1:
- MOV EAX,[R10]
- TEST EAX,$FF000000
- JZ @3 // complete transparency, proceed to next point
- // Get weight W = Fa
- MOV R9D,EAX // R9D <- Fa Fr Fg Fb
- SHR R9D,24 // R9D <- 00 00 00 Fa
- // Test Fa = 255 ?
- CMP R9D,$FF
- JZ @2
- // P = W * F
- MOV R8D,EAX // R8D <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
- IMUL EAX,R9D // EAX <- Pr ** Pb **
- SHR R8D,8 // R8D <- 00 Fa 00 Fg
- IMUL R8D,R9D // R8D <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD R8D,bias
- AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
- OR EAX,R8D // EAX <- Pa Pr Pg Pb
- // W = 1 - W;
- MOV EDX,[R11]
- XOR R9D,$000000FF // R9D <- 1 - R9D
- // Q = W * B
- MOV R8D,EDX // R8D <- Ba Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
- IMUL EDX,R9D // EDX <- Qr ** Qb **
- SHR R8D,8 // R8D <- 00 Ba 00 Bg
- IMUL R8D,R9D // R8D <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD R8D,bias
- AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
- OR R8D,EDX // R8D <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,R8D // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- @2:
- MOV [R11],EAX
- @3:
- ADD R10,4
- ADD R11,4
- // loop end
- DEC ECX
- JNZ @1
- @4:
- {$ENDIF}
- end;
- procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX <- Src
- // EDX <- Dst
- // ECX <- Count
- // test the counter for zero or negativity
- TEST ECX,ECX
- JS @4
- // test if source if fully transparent
- TEST EAX,$FF000000
- JZ @4
- PUSH EBX
- PUSH ESI
- PUSH EDI
- MOV ESI,EAX // ESI <- Src
- MOV EDI,EDX // EDI <- Dst
- // Get weight W = Fa
- SHR ESI, 24 // ESI <- W
- // test if source is fully opaque
- CMP ESI,$FF
- JZ @4
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ESI // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ESI // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- XOR ESI,$000000FF // ESI <- 1 - Fa
- // loop start
- @1:
- MOV EDX,[EDI]
- MOV EBX,EDX // EBX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL EDX,ESI // EDX <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ESI // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EBX,EAX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- OR EBX,$FF000000
- MOV [EDI],EBX
- ADD EDI,4
- DEC ECX
- JNZ @1
- POP EDI
- POP ESI
- POP EBX
- @3:
- RET
- @4:
- MOV [EDI],EAX
- ADD EDI,4
- DEC ECX
- JNZ @4
- POP EDI
- POP ESI
- POP EBX
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // RCX <- Src
- // RDX <- Dst
- // R8 <- Count
- // test the counter for zero or negativity
- TEST R8D,R8D // R8D <- Count
- JZ @2
- // test if source if fully transparent
- TEST ECX,$FF000000
- JZ @2
- PUSH RDI
- MOV RDI,RDX // RDI <- Dst
- MOV R9D,ECX // R9D <- Src
- // Get weight W = Fa
- SHR R9D,24 // R9D <- W
- // Test Fa = 255 ?
- CMP R9D,$FF
- JZ @3 // complete opaque,copy source
- // P = W * F
- MOV EAX,ECX // EAX <- Fa Fr Fg Fb
- AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
- AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
- IMUL ECX,R9D // ECX <- Pr ** Pb **
- SHR EAX,8 // EAX <- 00 Fa 00 Fg
- IMUL EAX,R9D // EAX <- Pa ** Pg **
- ADD ECX,Bias
- AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
- SHR ECX,8 // ECX <- 00 Pr 00 Pb
- ADD EAX,Bias
- AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
- OR ECX,EAX // ECX <- Pa Pr Pg Pb
- XOR R9D,$000000FF // R9D <- 1 - Fa
- // loop start
- @1:
- MOV EDX,[RDI]
- MOV EAX,EDX // EAX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
- AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
- IMUL EDX,R9D // EDX <- Qr ** Qb **
- SHR EAX,8 // EAX <- 00 Ba 00 Bg
- IMUL EAX,R9D // EAX <- Qa ** Qg **
- ADD EDX,Bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD EAX,Bias
- AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
- OR EAX,EDX // EAX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- Za Zr Zg Zb
- OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
- OR EAX,$FF000000
- MOV [RDI],EAX
- ADD RDI,4
- // loop end
- DEC R8D
- JNZ @1
- POP RDI
- @2:
- RET
- @3:
- // just copy source
- MOV [RDI],ECX
- ADD RDI,4
- DEC R8D
- JNZ @3
- POP RDI
- {$ENDIF}
- end;
- {$IFDEF TARGET_x86}
- function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- { This is an implementation of the merge formula, as described
- in a paper by Bruce Wallace in 1981. Merging is associative,
- that is, A over (B over C) = (A over B) over C. The formula is,
- Ra = Fa + Ba * (1 - Fa)
- Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
- where
- Rc is the resultant color,
- Ra is the resultant alpha,
- Fc is the foreground color,
- Fa is the foreground alpha,
- Bc is the background color,
- Ba is the background alpha.
- }
- // EAX <- F
- // EDX <- B
- // if F.A = 0 then
- TEST EAX,$FF000000
- JZ @exit0
- // else if B.A = 255 then
- CMP EDX,$FF000000
- JNC @blend
- // else if F.A = 255 then
- CMP EAX,$FF000000
- JNC @Exit
- // else if B.A = 0 then
- TEST EDX,$FF000000
- JZ @Exit
- @4:
- PUSH EBX
- PUSH ESI
- PUSH EDI
- ADD ESP,-$0C
- MOV [ESP+$04],EDX
- MOV [ESP],EAX
- // AH <- F.A
- // DL, CL <- B.A
- SHR EAX,16
- AND EAX,$0000FF00
- SHR EDX,24
- MOV CL,DL
- NOP
- NOP
- NOP
- // EDI <- PF
- // EDX <- PB
- // ESI <- PR
- // PF := @DivTable[F.A];
- LEA EDI,[EAX+DivTable]
- // PB := @DivTable[B.A];
- SHL EDX,$08
- LEA EDX,[EDX+DivTable]
- // Result.A := B.A + F.A - PB[F.A];
- SHR EAX,8
- ADD ECX,EAX
- SUB ECX,[EDX+EAX]
- MOV [ESP+$0B],CL
- // PR := @RcTable[Result.A];
- SHL ECX,$08
- AND ECX,$0000FFFF
- LEA ESI,[ECX+RcTable]
- { Red component }
- // Result.R := PB[B.R];
- XOR EAX,EAX
- MOV AL,[ESP+$06]
- MOV CL,[EDX+EAX]
- MOV [ESP+$0a],CL
- // X := F.R - Result.R;
- MOV AL,[ESP+$02]
- XOR EBX,EBX
- MOV BL,CL
- SUB EAX,EBX
- // if X >= 0 then
- JL @5
- // Result.R := PR[PF[X] + Result.R]
- MOVZX EAX,BYTE PTR[EDI+EAX]
- AND ECX,$000000FF
- ADD EAX,ECX
- MOV AL,[ESI+EAX]
- MOV [ESP+$0A],AL
- JMP @6
- @5:
- // Result.R := PR[Result.R - PF[-X]];
- NEG EAX
- MOVZX EAX,BYTE PTR[EDI+EAX]
- XOR ECX,ECX
- MOV CL,[ESP+$0A]
- SUB ECX,EAX
- MOV AL,[ESI+ECX]
- MOV [ESP+$0A],AL
- { Green component }
- @6:
- // Result.G := PB[B.G];
- XOR EAX,EAX
- MOV AL,[ESP+$05]
- MOV CL,[EDX+EAX]
- MOV [ESP+$09],CL
- // X := F.G - Result.G;
- MOV AL,[ESP+$01]
- XOR EBX,EBX
- MOV BL,CL
- SUB EAX,EBX
- // if X >= 0 then
- JL @7
- // Result.G := PR[PF[X] + Result.G]
- MOVZX EAX,BYTE PTR[EDI+EAX]
- AND ECX,$000000FF
- ADD EAX,ECX
- MOV AL,[ESI+EAX]
- MOV [ESP+$09],AL
- JMP @8
- @7:
- // Result.G := PR[Result.G - PF[-X]];
- NEG EAX
- MOVZX EAX,BYTE PTR[EDI+EAX]
- XOR ECX,ECX
- MOV CL,[ESP+$09]
- SUB ECX,EAX
- MOV AL,[ESI+ECX]
- MOV [ESP+$09],AL
- { Blue component }
- @8:
- // Result.B := PB[B.B];
- XOR EAX,EAX
- MOV AL,[ESP+$04]
- MOV CL,[EDX+EAX]
- MOV [ESP+$08],CL
- // X := F.B - Result.B;
- MOV AL,[ESP]
- XOR EDX,EDX
- MOV DL,CL
- SUB EAX,EDX
- // if X >= 0 then
- JL @9
- // Result.B := PR[PF[X] + Result.B]
- MOVZX EAX,BYTE PTR[EDI+EAX]
- XOR EDX,EDX
- MOV DL,CL
- ADD EAX,EDX
- MOV AL,[ESI+EAX]
- MOV [ESP+$08],AL
- JMP @10
- @9:
- // Result.B := PR[Result.B - PF[-X]];
- NEG EAX
- MOVZX EAX,BYTE PTR[EDI+EAX]
- XOR EDX,EDX
- MOV DL,CL
- SUB EDX,EAX
- MOV AL,[ESI+EDX]
- MOV [ESP+$08],AL
- @10:
- // EAX <- Result
- MOV EAX,[ESP+$08]
- // end;
- ADD ESP,$0C
- POP EDI
- POP ESI
- POP EBX
- RET
- @blend:
- CALL DWORD PTR [BlendReg]
- OR EAX,$FF000000
- RET
- @exit0:
- MOV EAX,EDX
- @Exit:
- end;
- {$ENDIF}
- function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- // combine RGBA channels of colors X and Y with the weight of X given in W
- // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
- {$IFDEF TARGET_x86}
- // EAX <- X
- // EDX <- Y
- // ECX <- W
- // W = 0 or $FF?
- JCXZ @1 // CX = 0 ? => Result := EDX
- CMP ECX,$FF // CX = $FF ? => Result := EDX
- JE @2
- PUSH EBX
- // P = W * X
- MOV EBX,EAX // EBX <- Xa Xr Xg Xb
- AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
- AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Xa 00 Xg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR ECX,$000000FF // ECX <- 1 - ECX
- MOV EBX,EDX // EBX <- Ya Yr Yg Yb
- // Q = W * Y
- AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
- AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ya 00 Yg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- POP EBX
- RET
- @1: MOV EAX,EDX
- @2:
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX <- X
- // EDX <- Y
- // R8D <- W
- // W = 0 or $FF?
- TEST R8D,R8D
- JZ @1 // W = 0 ? => Result := EDX
- MOV EAX,ECX // EAX <- Xa Xr Xg Xb
- CMP R8B,$FF // W = $FF ? => Result := EDX
- JE @2
- // P = W * X
- AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
- AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
- IMUL EAX,R8D // EAX <- Pr ** Pb **
- SHR ECX,8 // ECX <- 00 Xa 00 Xg
- IMUL ECX,R8D // ECX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
- OR EAX,ECX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- XOR R8D,$000000FF // R8D <- 1 - R8D
- MOV ECX,EDX // ECX <- Ya Yr Yg Yb
- // Q = W * Y
- AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
- AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
- IMUL EDX,R8D // EDX <- Qr ** Qb **
- SHR ECX,8 // ECX <- 00 Ya 00 Yg
- IMUL ECX,R8D // ECX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
- OR ECX,EDX // ECX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- Za Zr Zg Zb
- RET
- @1: MOV EAX,EDX
- @2:
- {$ENDIF}
- end;
- procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- {$IFDEF TARGET_x86}
- // EAX <- F
- // [EDX] <- B
- // ECX <- W
- // Check W
- JCXZ @1 // W = 0 ? => write nothing
- CMP ECX,$FF // W = 255? => write F
- {$IFDEF FPC}
- DB $74,$76 // Prob with FPC 2.2.2 and below
- {$ELSE}
- JZ @2
- {$ENDIF}
- PUSH EBX
- PUSH ESI
- // P = W * F
- MOV EBX,EAX // EBX <- ** Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- MOV ESI,[EDX]
- XOR ECX,$000000FF // ECX <- 1 - ECX
- // Q = W * B
- MOV EBX,ESI // EBX <- Ba Br Bg Bb
- AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL ESI,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD ESI,bias
- AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR ESI,8 // ESI <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,ESI // EBX <- Qa Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
- MOV [EDX],EAX
- POP ESI
- POP EBX
- @1: RET
- @2: MOV [EDX],EAX
- {$ENDIF}
- {$IFDEF TARGET_x64}
- // ECX <- F
- // [RDX] <- B
- // R8 <- W
- // Check W
- TEST R8D,R8D // Set flags for R8
- JZ @2 // W = 0 ? => Result := EDX
- MOV EAX,ECX // EAX <- ** Fr Fg Fb
- CMP R8B,$FF // W = 255? => write F
- JZ @1
- // P = W * F
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
- IMUL EAX,R8D // EAX <- Pr ** Pb **
- SHR ECX,8 // ECX <- 00 Fa 00 Fg
- IMUL ECX,R8D // ECX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
- OR EAX,ECX // EAX <- Pa Pr Pg Pb
- // W = 1 - W
- MOV R9D,[RDX]
- XOR R8D,$000000FF // R8D <- 1 - R8D
- // Q = W * B
- MOV ECX,R9D // ECX <- Ba Br Bg Bb
- AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
- AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
- IMUL R9D,R8D // R9D <- Qr ** Qb **
- SHR ECX,8 // ECX <- 00 Ba 00 Bg
- IMUL ECX,R8D // ECX <- Qa ** Qg **
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
- SHR R9D,8 // R9D <- 00 Qr ** Qb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
- OR ECX,R9D // ECX <- 00 Qr Qg Qb
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- 00 Zr Zg Zb
- @1: MOV [RDX],EAX
- @2:
- {$ENDIF}
- end;
- procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
- asm
- end;
- procedure RegisterBindingFunctions;
- begin
- {$IFNDEF PUREPASCAL}
- BlendRegistry.Add(FID_EMMS, @EMMS_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_COMBINEREG, @CombineReg_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDREG, @BlendReg_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDMEM, @BlendMem_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDMEMS, @BlendMems_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDLINE, @BlendLine_ASM, [], 0, BlendRegistryPriorityASM);
- BlendRegistry.Add(FID_BLENDLINE1, @BlendLine1_ASM, [], 0, BlendRegistryPriorityASM);
- {$IFNDEF TARGET_x64}
- BlendRegistry.Add(FID_MERGEREG, @MergeReg_ASM, [], 0, BlendRegistryPriorityASM);
- {$ENDIF}
- {$ENDIF}
- end;
- initialization
- RegisterBindingFunctions;
- end.
|