GR32_BlendASM.pas 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. unit GR32_BlendASM;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is Graphics32
  23. *
  24. * The Initial Developer of the Original Code is
  25. * Alex A. Denisov
  26. *
  27. * Portions created by the Initial Developer are Copyright (C) 2000-2009
  28. * the Initial Developer. All Rights Reserved.
  29. *
  30. * Contributor(s):
  31. * Christian-W. Budde
  32. * - 2019/04/01 - Refactoring
  33. *
  34. * ***** END LICENSE BLOCK ***** *)
  35. interface
  36. {$I GR32.inc}
  37. uses
  38. GR32;
  39. function BlendReg_ASM(F, B: TColor32): TColor32;
  40. procedure BlendMem_ASM(F: TColor32; var B: TColor32);
  41. procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer);
  42. function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32;
  43. procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: Cardinal);
  44. procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer);
  45. procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer);
  46. function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32;
  47. procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal);
  48. {$IFDEF TARGET_x86}
  49. function MergeReg_ASM(F, B: TColor32): TColor32;
  50. {$ENDIF}
  51. procedure EMMS_ASM;
  52. implementation
  53. uses
  54. GR32_Blend,
  55. GR32_LowLevel,
  56. GR32_System;
  57. { ASM versions }
  58. const
  59. BlendRegistryPriorityASM = -256;
  60. { Assembler versions }
  61. const
  62. bias = $00800080;
  63. function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  64. asm
  65. // blend foreground color (F) to a background color (B),
  66. // using alpha channel value of F
  67. // Result Z = Fa * Fargb + (1 - Fa) * Bargb
  68. // Result Z = P + Q
  69. {$IFDEF TARGET_x86}
  70. // EAX <- F
  71. // EDX <- B
  72. // Test Fa = 255 ?
  73. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  74. JNC @2
  75. // Test Fa = 0 ?
  76. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  77. JZ @1
  78. // Get weight W = Fa
  79. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  80. SHR ECX,24 // ECX <- 00 00 00 Fa
  81. PUSH EBX
  82. // P = W * F
  83. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  84. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  85. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  86. IMUL EAX,ECX // EAX <- Pr ** Pb **
  87. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  88. IMUL EBX,ECX // EBX <- Pa ** Pg **
  89. ADD EAX,bias
  90. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  91. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  92. ADD EBX,bias
  93. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  94. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  95. // W = 1 - W
  96. XOR ECX,$000000FF // ECX <- 1 - ECX
  97. // Q = W * B
  98. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  99. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  100. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  101. IMUL EDX,ECX // EDX <- Qr ** Qb **
  102. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  103. IMUL EBX,ECX // EBX <- Qa ** Qg **
  104. ADD EDX,bias
  105. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  106. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  107. ADD EBX,bias
  108. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  109. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  110. // Z = P + Q (assuming no overflow at each byte)
  111. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  112. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  113. POP EBX
  114. RET
  115. @1: MOV EAX,EDX
  116. @2:
  117. {$ENDIF}
  118. // EAX <- F
  119. // EDX <- B
  120. {$IFDEF TARGET_x64}
  121. MOV RAX, RCX
  122. // Test Fa = 255 ?
  123. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  124. JNC @2
  125. // Test Fa = 0 ?
  126. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  127. JZ @1
  128. // Get weight W = Fa
  129. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  130. SHR ECX,24 // ECX <- 00 00 00 Fa
  131. // P = W * F
  132. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  133. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  134. AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
  135. IMUL EAX,ECX // EAX <- Pr ** Pb **
  136. SHR R9D,8 // R9D <- 00 Fa 00 Fg
  137. IMUL R9D,ECX // R9D <- Pa ** Pg **
  138. ADD EAX,bias
  139. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  140. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  141. ADD R9D,bias
  142. AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
  143. OR EAX,R9D // EAX <- Pa Pr Pg Pb
  144. // W = 1 - W
  145. XOR ECX,$000000FF // ECX <- 1 - ECX
  146. // Q = W * B
  147. MOV R9D,EDX // R9D <- Ba Br Bg Bb
  148. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  149. AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
  150. IMUL EDX,ECX // EDX <- Qr ** Qb **
  151. SHR R9D,8 // R9D <- 00 Ba 00 Bg
  152. IMUL R9D,ECX // R9D <- Qa ** Qg **
  153. ADD EDX,bias
  154. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  155. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  156. ADD R9D,bias
  157. AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
  158. OR R9D,EDX // R9D <- Qa Qr Qg Qb
  159. // Z = P + Q (assuming no overflow at each byte)
  160. ADD EAX,R9D // EAX <- Za Zr Zg Zb
  161. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  162. RET
  163. @1: MOV EAX,EDX
  164. @2:
  165. {$ENDIF}
  166. end;
  167. procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  168. asm
  169. {$IFDEF TARGET_x86}
  170. // EAX <- F
  171. // [EDX] <- B
  172. // Test Fa = 0 ?
  173. TEST EAX,$FF000000 // Fa = 0 ? => do not write
  174. JZ @2
  175. // Get weight W = Fa
  176. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  177. SHR ECX,24 // ECX <- 00 00 00 Fa
  178. // Test Fa = 255 ?
  179. CMP ECX,$FF
  180. JZ @1
  181. PUSH EBX
  182. PUSH ESI
  183. // P = W * F
  184. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  185. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  186. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  187. IMUL EAX,ECX // EAX <- Pr ** Pb **
  188. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  189. IMUL EBX,ECX // EBX <- Pa ** Pg **
  190. ADD EAX,bias // add bias
  191. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  192. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  193. ADD EBX,bias // add bias
  194. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  195. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  196. MOV ESI,[EDX]
  197. // W = 1 - W
  198. XOR ECX,$000000FF // ECX <- 1 - ECX
  199. // Q = W * B
  200. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  201. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  202. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  203. IMUL ESI,ECX // ESI <- Qr ** Qb **
  204. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  205. IMUL EBX,ECX // EBX <- Qa ** Qg **
  206. ADD ESI,bias // add bias
  207. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  208. SHR ESI,8 // ESI <- 00 Qr 00 Qb
  209. ADD EBX,bias // add bias
  210. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  211. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  212. // Z = P + Q (assuming no overflow at each byte)
  213. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  214. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  215. MOV [EDX],EAX
  216. POP ESI
  217. POP EBX
  218. RET
  219. @1: MOV [EDX],EAX
  220. @2:
  221. {$ENDIF}
  222. {$IFDEF TARGET_x64}
  223. // ECX <- F
  224. // [RDX] <- B
  225. // Test Fa = 0 ?
  226. TEST ECX,$FF000000 // Fa = 0 ? => do not write
  227. JZ @2
  228. MOV EAX, ECX // EAX <- Fa Fr Fg Fb
  229. // Get weight W = Fa
  230. SHR ECX,24 // ECX <- 00 00 00 Fa
  231. // Test Fa = 255 ?
  232. CMP ECX,$FF
  233. JZ @1
  234. // P = W * F
  235. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  236. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  237. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  238. IMUL EAX,ECX // EAX <- Pr ** Pb **
  239. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  240. IMUL R8D,ECX // R8D <- Pa ** Pg **
  241. ADD EAX,bias
  242. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  243. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  244. ADD R8D,bias
  245. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  246. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  247. MOV R9D,[RDX]
  248. // W = 1 - W
  249. XOR ECX,$000000FF // ECX <- 1 - ECX
  250. // Q = W * B
  251. MOV R8D,R9D // R8D <- Ba Br Bg Bb
  252. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  253. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  254. IMUL R9D,ECX // R9D <- Qr ** Qb **
  255. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  256. IMUL R8D,ECX // R8D <- Qa ** Qg **
  257. ADD R9D,bias
  258. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  259. SHR R9D,8 // R9D <- 00 Qr 00 Qb
  260. ADD R8D,bias
  261. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  262. OR R8D,R9D // R8D <- Qa Qr Qg Qb
  263. // Z = P + Q (assuming no overflow at each byte)
  264. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  265. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  266. MOV [RDX],EAX
  267. RET
  268. @1: MOV [RDX],EAX
  269. @2:
  270. {$ENDIF}
  271. end;
  272. procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  273. asm
  274. {$IFDEF TARGET_x86}
  275. TEST ECX,ECX
  276. JZ @4
  277. PUSH EBX
  278. PUSH ESI
  279. PUSH EDI
  280. MOV ESI,EAX
  281. MOV EDI,EDX
  282. @1:
  283. // Test Fa = 0 ?
  284. MOV EAX,[ESI]
  285. TEST EAX,$FF000000
  286. JZ @3
  287. PUSH ECX
  288. // Get weight W = Fa
  289. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  290. SHR ECX,24 // ECX <- 00 00 00 Fa
  291. // Test Fa = 255 ?
  292. CMP ECX,$FF
  293. JZ @2
  294. // P = W * F
  295. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  296. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  297. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  298. IMUL EAX,ECX // EAX <- Pr ** Pb **
  299. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  300. IMUL EBX,ECX // EBX <- Pa ** Pg **
  301. ADD EAX,bias // add bias
  302. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  303. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  304. ADD EBX,bias // add bias
  305. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  306. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  307. MOV EDX,[EDI]
  308. // W = 1 - W
  309. XOR ECX,$000000FF // ECX <- 1 - ECX
  310. // Q = W * B
  311. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  312. AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
  313. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  314. IMUL EDX,ECX // ESI <- Qr ** Qb **
  315. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  316. IMUL EBX,ECX // EBX <- Qa ** Qg **
  317. ADD EDX,bias // add bias
  318. AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
  319. SHR EDX,8 // ESI <- 00 Qr 00 Qb
  320. ADD EBX,bias // add bias
  321. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  322. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  323. // Z = P + Q (assuming no overflow at each byte)
  324. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  325. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  326. @2:
  327. OR EAX,$FF000000
  328. MOV [EDI],EAX
  329. POP ECX
  330. @3:
  331. ADD ESI,4
  332. ADD EDI,4
  333. DEC ECX
  334. JNZ @1
  335. POP EDI
  336. POP ESI
  337. POP EBX
  338. @4:
  339. RET
  340. {$ENDIF}
  341. {$IFDEF TARGET_x64}
  342. TEST R8D,R8D
  343. JZ @4
  344. PUSH RDI
  345. MOV R9,RCX
  346. MOV RDI,RDX
  347. @1:
  348. MOV ECX,[RSI]
  349. TEST ECX,$FF000000
  350. JZ @3
  351. PUSH R8
  352. MOV R8D,ECX
  353. SHR R8D,24
  354. CMP R8D,$FF
  355. JZ @2
  356. MOV EAX,ECX
  357. AND ECX,$00FF00FF
  358. AND EAX,$FF00FF00
  359. IMUL ECX,R8D
  360. SHR EAX,8
  361. IMUL EAX,R8D
  362. ADD ECX,bias
  363. AND ECX,$FF00FF00
  364. SHR ECX,8
  365. ADD EAX,bias
  366. AND EAX,$FF00FF00
  367. OR ECX,EAX
  368. MOV EDX,[RDI]
  369. XOR R8D,$000000FF
  370. MOV EAX,EDX
  371. AND EDX,$00FF00FF
  372. AND EAX,$FF00FF00
  373. IMUL EDX, R8D
  374. SHR EAX,8
  375. IMUL EAX,R8D
  376. ADD EDX,bias
  377. AND EDX,$FF00FF00
  378. SHR EDX,8
  379. ADD EAX,bias
  380. AND EAX,$FF00FF00
  381. OR EAX,EDX
  382. ADD ECX,EAX
  383. @2:
  384. OR ECX,$FF000000
  385. MOV [RDI],ECX
  386. POP R8
  387. @3:
  388. ADD R9,4
  389. ADD RDI,4
  390. DEC R8D
  391. JNZ @1
  392. POP RDI
  393. @4:
  394. RET
  395. {$ENDIF}
  396. end;
  397. function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  398. asm
  399. // blend foreground color (F) to a background color (B),
  400. // using alpha channel value of F multiplied by master alpha (M)
  401. // no checking for M = $FF, in this case Graphics32 uses BlendReg
  402. // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
  403. // Result Z = P + Q
  404. // EAX <- F
  405. // EDX <- B
  406. // ECX <- M
  407. {$IFDEF TARGET_x86}
  408. // Check Fa > 0 ?
  409. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  410. JZ @2
  411. PUSH EBX
  412. // Get weight W = Fa * M
  413. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  414. INC ECX // 255:256 range bias
  415. SHR EBX,24 // EBX <- 00 00 00 Fa
  416. IMUL ECX,EBX // ECX <- 00 00 W **
  417. SHR ECX,8 // ECX <- 00 00 00 W
  418. JZ @1 // W = 0 ? => Result := EDX
  419. // P = W * F
  420. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  421. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  422. AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
  423. IMUL EAX,ECX // EAX <- Pr ** Pb **
  424. SHR EBX,8 // EBX <- 00 00 00 Fg
  425. IMUL EBX,ECX // EBX <- Pa ** Pg **
  426. ADD EAX,bias
  427. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  428. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  429. ADD EBX,bias
  430. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  431. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  432. // W = 1 - W
  433. XOR ECX,$000000FF // ECX <- 1 - ECX
  434. // Q = W * B
  435. MOV EBX,EDX // EBX <- 00 Br Bg Bb
  436. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  437. AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
  438. IMUL EDX,ECX // EDX <- Qr ** Qb **
  439. SHR EBX,8 // EBX <- 00 00 00 Bg
  440. IMUL EBX,ECX // EBX <- Qa ** Qg **
  441. ADD EDX,bias
  442. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  443. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  444. ADD EBX,bias
  445. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  446. OR EBX,EDX // EBX <- 00 Qr Qg Qb
  447. // Z = P + Q (assuming no overflow at each byte)
  448. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  449. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  450. POP EBX
  451. RET
  452. @1:
  453. POP EBX
  454. @2: MOV EAX,EDX
  455. {$ENDIF}
  456. {$IFDEF TARGET_x64}
  457. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  458. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  459. JZ @1
  460. // Get weight W = Fa * M
  461. INC R8D // 255:256 range bias
  462. SHR ECX,24 // ECX <- 00 00 00 Fa
  463. IMUL R8D,ECX // R8D <- 00 00 W **
  464. SHR R8D,8 // R8D <- 00 00 00 W
  465. JZ @1 // W = 0 ? => Result := EDX
  466. // P = W * F
  467. MOV ECX,EAX // ECX <- ** Fr Fg Fb
  468. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  469. AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
  470. IMUL EAX,R8D // EAX <- Pr ** Pb **
  471. SHR ECX,8 // ECX <- 00 Fa 00 Fg
  472. IMUL ECX,R8D // ECX <- Pa ** Pg **
  473. ADD EAX,bias
  474. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  475. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  476. ADD ECX,bias
  477. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  478. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  479. // W = 1 - W
  480. XOR R8D,$000000FF // R8D <- 1 - R8D
  481. // Q = W * B
  482. MOV ECX,EDX // ECX <- 00 Br Bg Bb
  483. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  484. AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
  485. IMUL EDX,R8D // EDX <- Qr ** Qb **
  486. SHR ECX,8 // ECX <- 00 Ba 00 Bg
  487. IMUL ECX,R8D // ECX <- Qa ** Qg **
  488. ADD EDX,bias
  489. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  490. SHR EDX,8 // EDX <- 00 Qr ** Qb
  491. ADD ECX,bias
  492. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  493. OR ECX,EDX // ECX <- Qa Qr Qg Qb
  494. // Z = P + Q (assuming no overflow at each byte)
  495. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  496. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  497. RET
  498. @1: MOV EAX,EDX
  499. {$ENDIF}
  500. end;
  501. procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  502. asm
  503. {$IFDEF TARGET_x86}
  504. // EAX <- F
  505. // [EDX] <- B
  506. // ECX <- M
  507. // Check Fa > 0 ?
  508. TEST EAX,$FF000000 // Fa = 0? => write nothing
  509. JZ @2
  510. PUSH EBX
  511. // Get weight W = Fa * M
  512. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  513. SHR EBX,24 // EBX <- 00 00 00 Fa
  514. INC ECX // 255:256 range bias for M
  515. IMUL ECX,EBX // ECX <- 00 00 W **
  516. SHR ECX,8 // ECX <- 00 00 00 W
  517. JZ @1 // W = 0 ? => write nothing
  518. PUSH ESI
  519. // P = W * F
  520. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  521. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  522. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  523. IMUL EAX,ECX // EAX <- Pr ** Pb **
  524. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  525. IMUL EBX,ECX // EBX <- Pa ** Pg **
  526. ADD EAX,bias
  527. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  528. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  529. ADD EBX,bias
  530. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  531. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  532. // W = 1 - W;
  533. MOV ESI,[EDX]
  534. XOR ECX,$000000FF // ECX <- 1 - ECX
  535. // Q = W * B
  536. MOV EBX,ESI // EBX <- 00 Br Bg Bb
  537. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  538. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  539. IMUL ESI,ECX // ESI <- Qr ** Qb **
  540. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  541. IMUL EBX,ECX // EBX <- Qa ** Qg **
  542. ADD ESI,bias
  543. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  544. SHR ESI,8 // ESI <- 00 Qr ** Qb
  545. ADD EBX,bias
  546. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  547. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  548. // Z = P + Q (assuming no overflow at each byte)
  549. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  550. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  551. MOV [EDX],EAX
  552. POP ESI
  553. @1: POP EBX
  554. @2:
  555. {$ENDIF}
  556. {$IFDEF TARGET_x64}
  557. // ECX <- F
  558. // [RDX] <- B
  559. // R8 <- M
  560. // ECX <- F
  561. // [EDX] <- B
  562. // R8 <- M
  563. // Check Fa > 0 ?
  564. TEST ECX,$FF000000 // Fa = 0? => write nothing
  565. JZ @1
  566. // Get weight W = Fa * M
  567. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  568. INC R8D // 255:256 range bias
  569. SHR EAX,24 // EAX <- 00 00 00 Fa
  570. IMUL R8D,EAX // R8D <- 00 00 W **
  571. ADD R8D,bias
  572. SHR R8D,8 // R8D <- 00 00 00 W
  573. JZ @1 // W = 0 ? => write nothing
  574. // P = W * F
  575. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  576. AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
  577. AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
  578. IMUL ECX,R8D // ECX <- Pr ** Pb **
  579. SHR EAX,8 // EAX <- 00 Fa 00 Fg
  580. IMUL EAX,R8D // EAX <- Pa 00 Pg **
  581. ADD ECX,bias
  582. AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
  583. SHR ECX,8 // ECX <- 00 Pr 00 Pb
  584. ADD EAX,bias
  585. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  586. OR ECX,EAX // ECX <- Pa Pr Pg Pb
  587. // W = 1 - W
  588. MOV R9D,[RDX]
  589. XOR R8D,$000000FF // R8D <- 1 - R8
  590. // Q = W * B
  591. MOV EAX,R9D // EAX <- 00 Br Bg Bb
  592. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  593. AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
  594. IMUL R9D,R8D // R9D <- Qr ** Qb **
  595. SHR EAX,8 // EAX <- 00 00 00 Bg
  596. IMUL EAX,R8D // EAX <- 00 00 Qg **
  597. ADD R9D,bias
  598. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  599. SHR R9D,8 // R9D <- 00 Qr ** Qb
  600. ADD EAX,bias
  601. AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
  602. OR EAX,R9D // EAX <- 00 Qr Qg Qb
  603. // Z = P + Q (assuming no overflow at each byte)
  604. ADD ECX,EAX // ECX <- 00 Zr Zg Zb
  605. MOV [RDX],ECX
  606. @1:
  607. {$ENDIF}
  608. end;
  609. procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  610. asm
  611. {$IFDEF TARGET_x86}
  612. // EAX <- Src
  613. // EDX <- Dst
  614. // ECX <- Count
  615. // test the counter for zero or negativity
  616. TEST ECX,ECX
  617. JS @4
  618. PUSH EBX
  619. PUSH ESI
  620. PUSH EDI
  621. MOV ESI,EAX // ESI <- Src
  622. MOV EDI,EDX // EDI <- Dst
  623. // loop start
  624. @1: MOV EAX,[ESI]
  625. TEST EAX,$FF000000
  626. JZ @3 // complete transparency, proceed to next point
  627. PUSH ECX // store counter
  628. // Get weight W = Fa
  629. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  630. SHR ECX,24 // ECX <- 00 00 00 Fa
  631. // Test Fa = 255 ?
  632. CMP ECX,$FF
  633. JZ @2
  634. // P = W * F
  635. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  636. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  637. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  638. IMUL EAX,ECX // EAX <- Pr ** Pb **
  639. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  640. IMUL EBX,ECX // EBX <- Pa ** Pg **
  641. ADD EAX,bias
  642. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  643. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  644. ADD EBX,bias
  645. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  646. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  647. // W = 1 - W;
  648. MOV EDX,[EDI]
  649. XOR ECX,$000000FF // ECX <- 1 - ECX
  650. // Q = W * B
  651. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  652. AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
  653. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  654. IMUL EDX,ECX // EDX <- Qr ** Qb **
  655. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  656. IMUL EBX,ECX // EBX <- Qa ** Qg **
  657. ADD EDX,bias
  658. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  659. SHR EDX,8 // EDX <- 00 Qr ** Qb
  660. ADD EBX,bias
  661. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  662. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  663. // Z = P + Q (assuming no overflow at each byte)
  664. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  665. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  666. @2:
  667. MOV [EDI],EAX
  668. POP ECX // restore counter
  669. @3:
  670. ADD ESI,4
  671. ADD EDI,4
  672. // loop end
  673. DEC ECX
  674. JNZ @1
  675. POP EDI
  676. POP ESI
  677. POP EBX
  678. @4:
  679. {$ENDIF}
  680. {$IFDEF TARGET_x64}
  681. // RCX <- Src
  682. // RDX <- Dst
  683. // R8 <- Count
  684. // test the counter for zero or negativity
  685. TEST R8D,R8D
  686. JS @4
  687. MOV R10,RCX // R10 <- Src
  688. MOV R11,RDX // R11 <- Dst
  689. MOV ECX,R8D // RCX <- Count
  690. // loop start
  691. @1:
  692. MOV EAX,[R10]
  693. TEST EAX,$FF000000
  694. JZ @3 // complete transparency, proceed to next point
  695. // Get weight W = Fa
  696. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  697. SHR R9D,24 // R9D <- 00 00 00 Fa
  698. // Test Fa = 255 ?
  699. CMP R9D,$FF
  700. JZ @2
  701. // P = W * F
  702. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  703. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  704. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  705. IMUL EAX,R9D // EAX <- Pr ** Pb **
  706. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  707. IMUL R8D,R9D // R8D <- Pa ** Pg **
  708. ADD EAX,bias
  709. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  710. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  711. ADD R8D,bias
  712. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  713. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  714. // W = 1 - W;
  715. MOV EDX,[R11]
  716. XOR R9D,$000000FF // R9D <- 1 - R9D
  717. // Q = W * B
  718. MOV R8D,EDX // R8D <- Ba Br Bg Bb
  719. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  720. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  721. IMUL EDX,R9D // EDX <- Qr ** Qb **
  722. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  723. IMUL R8D,R9D // R8D <- Qa ** Qg **
  724. ADD EDX,bias
  725. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  726. SHR EDX,8 // EDX <- 00 Qr ** Qb
  727. ADD R8D,bias
  728. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  729. OR R8D,EDX // R8D <- Qa Qr Qg Qb
  730. // Z = P + Q (assuming no overflow at each byte)
  731. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  732. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  733. @2:
  734. MOV [R11],EAX
  735. @3:
  736. ADD R10,4
  737. ADD R11,4
  738. // loop end
  739. DEC ECX
  740. JNZ @1
  741. @4:
  742. {$ENDIF}
  743. end;
  744. procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  745. asm
  746. {$IFDEF TARGET_x86}
  747. // EAX <- Src
  748. // EDX <- Dst
  749. // ECX <- Count
  750. // test the counter for zero or negativity
  751. TEST ECX,ECX
  752. JS @4
  753. // test if source if fully transparent
  754. TEST EAX,$FF000000
  755. JZ @4
  756. PUSH EBX
  757. PUSH ESI
  758. PUSH EDI
  759. MOV ESI,EAX // ESI <- Src
  760. MOV EDI,EDX // EDI <- Dst
  761. // Get weight W = Fa
  762. SHR ESI, 24 // ESI <- W
  763. // test if source is fully opaque
  764. CMP ESI,$FF
  765. JZ @4
  766. // P = W * F
  767. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  768. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  769. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  770. IMUL EAX,ESI // EAX <- Pr ** Pb **
  771. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  772. IMUL EBX,ESI // EBX <- Pa ** Pg **
  773. ADD EAX,bias
  774. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  775. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  776. ADD EBX,bias
  777. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  778. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  779. XOR ESI,$000000FF // ESI <- 1 - Fa
  780. // loop start
  781. @1:
  782. MOV EDX,[EDI]
  783. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  784. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  785. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  786. IMUL EDX,ESI // EDX <- Qr ** Qb **
  787. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  788. IMUL EBX,ESI // EBX <- Qa ** Qg **
  789. ADD EDX,bias
  790. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  791. SHR EDX,8 // EDX <- 00 Qr ** Qb
  792. ADD EBX,bias
  793. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  794. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  795. // Z = P + Q (assuming no overflow at each byte)
  796. ADD EBX,EAX // EAX <- Za Zr Zg Zb
  797. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  798. OR EBX,$FF000000
  799. MOV [EDI],EBX
  800. ADD EDI,4
  801. DEC ECX
  802. JNZ @1
  803. POP EDI
  804. POP ESI
  805. POP EBX
  806. @3:
  807. RET
  808. @4:
  809. MOV [EDI],EAX
  810. ADD EDI,4
  811. DEC ECX
  812. JNZ @4
  813. POP EDI
  814. POP ESI
  815. POP EBX
  816. {$ENDIF}
  817. {$IFDEF TARGET_x64}
  818. // RCX <- Src
  819. // RDX <- Dst
  820. // R8 <- Count
  821. // test the counter for zero or negativity
  822. TEST R8D,R8D // R8D <- Count
  823. JZ @2
  824. // test if source if fully transparent
  825. TEST ECX,$FF000000
  826. JZ @2
  827. PUSH RDI
  828. MOV RDI,RDX // RDI <- Dst
  829. MOV R9D,ECX // R9D <- Src
  830. // Get weight W = Fa
  831. SHR R9D,24 // R9D <- W
  832. // Test Fa = 255 ?
  833. CMP R9D,$FF
  834. JZ @3 // complete opaque,copy source
  835. // P = W * F
  836. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  837. AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
  838. AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
  839. IMUL ECX,R9D // ECX <- Pr ** Pb **
  840. SHR EAX,8 // EAX <- 00 Fa 00 Fg
  841. IMUL EAX,R9D // EAX <- Pa ** Pg **
  842. ADD ECX,Bias
  843. AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
  844. SHR ECX,8 // ECX <- 00 Pr 00 Pb
  845. ADD EAX,Bias
  846. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  847. OR ECX,EAX // ECX <- Pa Pr Pg Pb
  848. XOR R9D,$000000FF // R9D <- 1 - Fa
  849. // loop start
  850. @1:
  851. MOV EDX,[RDI]
  852. MOV EAX,EDX // EAX <- Ba Br Bg Bb
  853. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  854. AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
  855. IMUL EDX,R9D // EDX <- Qr ** Qb **
  856. SHR EAX,8 // EAX <- 00 Ba 00 Bg
  857. IMUL EAX,R9D // EAX <- Qa ** Qg **
  858. ADD EDX,Bias
  859. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  860. SHR EDX,8 // EDX <- 00 Qr ** Qb
  861. ADD EAX,Bias
  862. AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
  863. OR EAX,EDX // EAX <- Qa Qr Qg Qb
  864. // Z = P + Q (assuming no overflow at each byte)
  865. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  866. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  867. OR EAX,$FF000000
  868. MOV [RDI],EAX
  869. ADD RDI,4
  870. // loop end
  871. DEC R8D
  872. JNZ @1
  873. POP RDI
  874. @2:
  875. RET
  876. @3:
  877. // just copy source
  878. MOV [RDI],ECX
  879. ADD RDI,4
  880. DEC R8D
  881. JNZ @3
  882. POP RDI
  883. {$ENDIF}
  884. end;
  885. {$IFDEF TARGET_x86}
  886. function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  887. asm
  888. { This is an implementation of the merge formula, as described
  889. in a paper by Bruce Wallace in 1981. Merging is associative,
  890. that is, A over (B over C) = (A over B) over C. The formula is,
  891. Ra = Fa + Ba * (1 - Fa)
  892. Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
  893. where
  894. Rc is the resultant color,
  895. Ra is the resultant alpha,
  896. Fc is the foreground color,
  897. Fa is the foreground alpha,
  898. Bc is the background color,
  899. Ba is the background alpha.
  900. }
  901. // EAX <- F
  902. // EDX <- B
  903. // if F.A = 0 then
  904. TEST EAX,$FF000000
  905. JZ @exit0
  906. // else if B.A = 255 then
  907. CMP EDX,$FF000000
  908. JNC @blend
  909. // else if F.A = 255 then
  910. CMP EAX,$FF000000
  911. JNC @Exit
  912. // else if B.A = 0 then
  913. TEST EDX,$FF000000
  914. JZ @Exit
  915. @4:
  916. PUSH EBX
  917. PUSH ESI
  918. PUSH EDI
  919. ADD ESP,-$0C
  920. MOV [ESP+$04],EDX
  921. MOV [ESP],EAX
  922. // AH <- F.A
  923. // DL, CL <- B.A
  924. SHR EAX,16
  925. AND EAX,$0000FF00
  926. SHR EDX,24
  927. MOV CL,DL
  928. NOP
  929. NOP
  930. NOP
  931. // EDI <- PF
  932. // EDX <- PB
  933. // ESI <- PR
  934. // PF := @DivTable[F.A];
  935. LEA EDI,[EAX+DivTable]
  936. // PB := @DivTable[B.A];
  937. SHL EDX,$08
  938. LEA EDX,[EDX+DivTable]
  939. // Result.A := B.A + F.A - PB[F.A];
  940. SHR EAX,8
  941. ADD ECX,EAX
  942. SUB ECX,[EDX+EAX]
  943. MOV [ESP+$0B],CL
  944. // PR := @RcTable[Result.A];
  945. SHL ECX,$08
  946. AND ECX,$0000FFFF
  947. LEA ESI,[ECX+RcTable]
  948. { Red component }
  949. // Result.R := PB[B.R];
  950. XOR EAX,EAX
  951. MOV AL,[ESP+$06]
  952. MOV CL,[EDX+EAX]
  953. MOV [ESP+$0a],CL
  954. // X := F.R - Result.R;
  955. MOV AL,[ESP+$02]
  956. XOR EBX,EBX
  957. MOV BL,CL
  958. SUB EAX,EBX
  959. // if X >= 0 then
  960. JL @5
  961. // Result.R := PR[PF[X] + Result.R]
  962. MOVZX EAX,BYTE PTR[EDI+EAX]
  963. AND ECX,$000000FF
  964. ADD EAX,ECX
  965. MOV AL,[ESI+EAX]
  966. MOV [ESP+$0A],AL
  967. JMP @6
  968. @5:
  969. // Result.R := PR[Result.R - PF[-X]];
  970. NEG EAX
  971. MOVZX EAX,BYTE PTR[EDI+EAX]
  972. XOR ECX,ECX
  973. MOV CL,[ESP+$0A]
  974. SUB ECX,EAX
  975. MOV AL,[ESI+ECX]
  976. MOV [ESP+$0A],AL
  977. { Green component }
  978. @6:
  979. // Result.G := PB[B.G];
  980. XOR EAX,EAX
  981. MOV AL,[ESP+$05]
  982. MOV CL,[EDX+EAX]
  983. MOV [ESP+$09],CL
  984. // X := F.G - Result.G;
  985. MOV AL,[ESP+$01]
  986. XOR EBX,EBX
  987. MOV BL,CL
  988. SUB EAX,EBX
  989. // if X >= 0 then
  990. JL @7
  991. // Result.G := PR[PF[X] + Result.G]
  992. MOVZX EAX,BYTE PTR[EDI+EAX]
  993. AND ECX,$000000FF
  994. ADD EAX,ECX
  995. MOV AL,[ESI+EAX]
  996. MOV [ESP+$09],AL
  997. JMP @8
  998. @7:
  999. // Result.G := PR[Result.G - PF[-X]];
  1000. NEG EAX
  1001. MOVZX EAX,BYTE PTR[EDI+EAX]
  1002. XOR ECX,ECX
  1003. MOV CL,[ESP+$09]
  1004. SUB ECX,EAX
  1005. MOV AL,[ESI+ECX]
  1006. MOV [ESP+$09],AL
  1007. { Blue component }
  1008. @8:
  1009. // Result.B := PB[B.B];
  1010. XOR EAX,EAX
  1011. MOV AL,[ESP+$04]
  1012. MOV CL,[EDX+EAX]
  1013. MOV [ESP+$08],CL
  1014. // X := F.B - Result.B;
  1015. MOV AL,[ESP]
  1016. XOR EDX,EDX
  1017. MOV DL,CL
  1018. SUB EAX,EDX
  1019. // if X >= 0 then
  1020. JL @9
  1021. // Result.B := PR[PF[X] + Result.B]
  1022. MOVZX EAX,BYTE PTR[EDI+EAX]
  1023. XOR EDX,EDX
  1024. MOV DL,CL
  1025. ADD EAX,EDX
  1026. MOV AL,[ESI+EAX]
  1027. MOV [ESP+$08],AL
  1028. JMP @10
  1029. @9:
  1030. // Result.B := PR[Result.B - PF[-X]];
  1031. NEG EAX
  1032. MOVZX EAX,BYTE PTR[EDI+EAX]
  1033. XOR EDX,EDX
  1034. MOV DL,CL
  1035. SUB EDX,EAX
  1036. MOV AL,[ESI+EDX]
  1037. MOV [ESP+$08],AL
  1038. @10:
  1039. // EAX <- Result
  1040. MOV EAX,[ESP+$08]
  1041. // end;
  1042. ADD ESP,$0C
  1043. POP EDI
  1044. POP ESI
  1045. POP EBX
  1046. RET
  1047. @blend:
  1048. CALL DWORD PTR [BlendReg]
  1049. OR EAX,$FF000000
  1050. RET
  1051. @exit0:
  1052. MOV EAX,EDX
  1053. @Exit:
  1054. end;
  1055. {$ENDIF}
  1056. function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1057. asm
  1058. // combine RGBA channels of colors X and Y with the weight of X given in W
  1059. // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
  1060. {$IFDEF TARGET_x86}
  1061. // EAX <- X
  1062. // EDX <- Y
  1063. // ECX <- W
  1064. // W = 0 or $FF?
  1065. JCXZ @1 // CX = 0 ? => Result := EDX
  1066. CMP ECX,$FF // CX = $FF ? => Result := EDX
  1067. JE @2
  1068. PUSH EBX
  1069. // P = W * X
  1070. MOV EBX,EAX // EBX <- Xa Xr Xg Xb
  1071. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1072. AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
  1073. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1074. SHR EBX,8 // EBX <- 00 Xa 00 Xg
  1075. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1076. ADD EAX,bias
  1077. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1078. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1079. ADD EBX,bias
  1080. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1081. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1082. // W = 1 - W
  1083. XOR ECX,$000000FF // ECX <- 1 - ECX
  1084. MOV EBX,EDX // EBX <- Ya Yr Yg Yb
  1085. // Q = W * Y
  1086. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1087. AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
  1088. IMUL EDX,ECX // EDX <- Qr ** Qb **
  1089. SHR EBX,8 // EBX <- 00 Ya 00 Yg
  1090. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1091. ADD EDX,bias
  1092. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1093. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1094. ADD EBX,bias
  1095. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1096. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  1097. // Z = P + Q (assuming no overflow at each byte)
  1098. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1099. POP EBX
  1100. RET
  1101. @1: MOV EAX,EDX
  1102. @2:
  1103. {$ENDIF}
  1104. {$IFDEF TARGET_x64}
  1105. // ECX <- X
  1106. // EDX <- Y
  1107. // R8D <- W
  1108. // W = 0 or $FF?
  1109. TEST R8D,R8D
  1110. JZ @1 // W = 0 ? => Result := EDX
  1111. MOV EAX,ECX // EAX <- Xa Xr Xg Xb
  1112. CMP R8B,$FF // W = $FF ? => Result := EDX
  1113. JE @2
  1114. // P = W * X
  1115. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1116. AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
  1117. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1118. SHR ECX,8 // ECX <- 00 Xa 00 Xg
  1119. IMUL ECX,R8D // ECX <- Pa ** Pg **
  1120. ADD EAX,bias
  1121. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1122. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1123. ADD ECX,bias
  1124. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1125. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  1126. // W = 1 - W
  1127. XOR R8D,$000000FF // R8D <- 1 - R8D
  1128. MOV ECX,EDX // ECX <- Ya Yr Yg Yb
  1129. // Q = W * Y
  1130. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1131. AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
  1132. IMUL EDX,R8D // EDX <- Qr ** Qb **
  1133. SHR ECX,8 // ECX <- 00 Ya 00 Yg
  1134. IMUL ECX,R8D // ECX <- Qa ** Qg **
  1135. ADD EDX,bias
  1136. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1137. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1138. ADD ECX,bias
  1139. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1140. OR ECX,EDX // ECX <- Qa Qr Qg Qb
  1141. // Z = P + Q (assuming no overflow at each byte)
  1142. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  1143. RET
  1144. @1: MOV EAX,EDX
  1145. @2:
  1146. {$ENDIF}
  1147. end;
  1148. procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1149. asm
  1150. {$IFDEF TARGET_x86}
  1151. // EAX <- F
  1152. // [EDX] <- B
  1153. // ECX <- W
  1154. // Check W
  1155. JCXZ @1 // W = 0 ? => write nothing
  1156. CMP ECX,$FF // W = 255? => write F
  1157. {$IFDEF FPC}
  1158. DB $74,$76 // Prob with FPC 2.2.2 and below
  1159. {$ELSE}
  1160. JZ @2
  1161. {$ENDIF}
  1162. PUSH EBX
  1163. PUSH ESI
  1164. // P = W * F
  1165. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  1166. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1167. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  1168. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1169. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  1170. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1171. ADD EAX,bias
  1172. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1173. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1174. ADD EBX,bias
  1175. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1176. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1177. // W = 1 - W
  1178. MOV ESI,[EDX]
  1179. XOR ECX,$000000FF // ECX <- 1 - ECX
  1180. // Q = W * B
  1181. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  1182. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  1183. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  1184. IMUL ESI,ECX // ESI <- Qr ** Qb **
  1185. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  1186. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1187. ADD ESI,bias
  1188. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  1189. SHR ESI,8 // ESI <- 00 Qr ** Qb
  1190. ADD EBX,bias
  1191. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1192. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  1193. // Z = P + Q (assuming no overflow at each byte)
  1194. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1195. MOV [EDX],EAX
  1196. POP ESI
  1197. POP EBX
  1198. @1: RET
  1199. @2: MOV [EDX],EAX
  1200. {$ENDIF}
  1201. {$IFDEF TARGET_x64}
  1202. // ECX <- F
  1203. // [RDX] <- B
  1204. // R8 <- W
  1205. // Check W
  1206. TEST R8D,R8D // Set flags for R8
  1207. JZ @2 // W = 0 ? => Result := EDX
  1208. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  1209. CMP R8B,$FF // W = 255? => write F
  1210. JZ @1
  1211. // P = W * F
  1212. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1213. AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
  1214. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1215. SHR ECX,8 // ECX <- 00 Fa 00 Fg
  1216. IMUL ECX,R8D // ECX <- Pa ** Pg **
  1217. ADD EAX,bias
  1218. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1219. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1220. ADD ECX,bias
  1221. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1222. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  1223. // W = 1 - W
  1224. MOV R9D,[RDX]
  1225. XOR R8D,$000000FF // R8D <- 1 - R8D
  1226. // Q = W * B
  1227. MOV ECX,R9D // ECX <- Ba Br Bg Bb
  1228. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  1229. AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
  1230. IMUL R9D,R8D // R9D <- Qr ** Qb **
  1231. SHR ECX,8 // ECX <- 00 Ba 00 Bg
  1232. IMUL ECX,R8D // ECX <- Qa ** Qg **
  1233. ADD R9D,bias
  1234. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  1235. SHR R9D,8 // R9D <- 00 Qr ** Qb
  1236. ADD ECX,bias
  1237. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1238. OR ECX,R9D // ECX <- 00 Qr Qg Qb
  1239. // Z = P + Q (assuming no overflow at each byte)
  1240. ADD EAX,ECX // EAX <- 00 Zr Zg Zb
  1241. @1: MOV [RDX],EAX
  1242. @2:
  1243. {$ENDIF}
  1244. end;
  1245. procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1246. asm
  1247. end;
  1248. procedure RegisterBindingFunctions;
  1249. begin
  1250. {$IFNDEF PUREPASCAL}
  1251. BlendRegistry.Add(FID_EMMS, @EMMS_ASM, [], 0, BlendRegistryPriorityASM);
  1252. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_ASM, [], 0, BlendRegistryPriorityASM);
  1253. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_ASM, [], 0, BlendRegistryPriorityASM);
  1254. BlendRegistry.Add(FID_BLENDREG, @BlendReg_ASM, [], 0, BlendRegistryPriorityASM);
  1255. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_ASM, [], 0, BlendRegistryPriorityASM);
  1256. BlendRegistry.Add(FID_BLENDMEMS, @BlendMems_ASM, [], 0, BlendRegistryPriorityASM);
  1257. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_ASM, [], 0, BlendRegistryPriorityASM);
  1258. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_ASM, [], 0, BlendRegistryPriorityASM);
  1259. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_ASM, [], 0, BlendRegistryPriorityASM);
  1260. BlendRegistry.Add(FID_BLENDLINE1, @BlendLine1_ASM, [], 0, BlendRegistryPriorityASM);
  1261. {$IFNDEF TARGET_x64}
  1262. BlendRegistry.Add(FID_MERGEREG, @MergeReg_ASM, [], 0, BlendRegistryPriorityASM);
  1263. {$ENDIF}
  1264. {$ENDIF}
  1265. end;
  1266. initialization
  1267. RegisterBindingFunctions;
  1268. end.