GR32.Blend.Assembler.pas 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453
  1. unit GR32.Blend.Assembler;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is Graphics32
  23. *
  24. * The Initial Developer of the Original Code is
  25. * Alex A. Denisov
  26. *
  27. * Portions created by the Initial Developer are Copyright (C) 2000-2009
  28. * the Initial Developer. All Rights Reserved.
  29. *
  30. * Contributor(s):
  31. * Christian-W. Budde
  32. * - 2019/04/01 - Refactoring
  33. *
  34. * ***** END LICENSE BLOCK ***** *)
  35. interface
  36. {$include GR32.inc}
  37. uses
  38. GR32;
  39. //------------------------------------------------------------------------------
  40. //
  41. // Assembler blend implementations
  42. //
  43. //------------------------------------------------------------------------------
  44. //------------------------------------------------------------------------------
  45. // Blend
  46. //------------------------------------------------------------------------------
  47. function BlendReg_ASM(F, B: TColor32): TColor32;
  48. procedure BlendMem_ASM(F: TColor32; var B: TColor32);
  49. procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer);
  50. function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32;
  51. procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: Cardinal);
  52. procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer);
  53. //------------------------------------------------------------------------------
  54. // Merge
  55. //------------------------------------------------------------------------------
  56. {$IFDEF TARGET_x86}
  57. function MergeReg_ASM(F, B: TColor32): TColor32;
  58. {$ENDIF}
  59. //------------------------------------------------------------------------------
  60. // Combine
  61. //------------------------------------------------------------------------------
  62. function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32;
  63. procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal);
  64. //------------------------------------------------------------------------------
  65. //
  66. // Bindings
  67. //
  68. //------------------------------------------------------------------------------
  69. const
  70. BlendRegistryPriorityASM = -256;
  71. //------------------------------------------------------------------------------
  72. //------------------------------------------------------------------------------
  73. //------------------------------------------------------------------------------
  74. implementation
  75. uses
  76. GR32_Blend,
  77. GR32_Bindings,
  78. GR32_LowLevel;
  79. const
  80. bias = $00800080;
  81. //------------------------------------------------------------------------------
  82. //
  83. // Blend
  84. //
  85. //------------------------------------------------------------------------------
  86. //------------------------------------------------------------------------------
  87. // BlendReg
  88. //------------------------------------------------------------------------------
  89. function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  90. asm
  91. // blend foreground color (F) to a background color (B),
  92. // using alpha channel value of F
  93. // Result Z = Fa * Fargb + (1 - Fa) * Bargb
  94. // Result Z = P + Q
  95. {$IFDEF TARGET_x86}
  96. // EAX <- F
  97. // EDX <- B
  98. // Test Fa = 255 ?
  99. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  100. JNC @2
  101. // Test Fa = 0 ?
  102. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  103. JZ @1
  104. // Get weight W = Fa
  105. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  106. SHR ECX,24 // ECX <- 00 00 00 Fa
  107. PUSH EBX
  108. // P = W * F
  109. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  110. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  111. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  112. IMUL EAX,ECX // EAX <- Pr ** Pb **
  113. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  114. IMUL EBX,ECX // EBX <- Pa ** Pg **
  115. ADD EAX,bias
  116. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  117. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  118. ADD EBX,bias
  119. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  120. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  121. // W = 1 - W
  122. XOR ECX,$000000FF // ECX <- 1 - ECX
  123. // Q = W * B
  124. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  125. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  126. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  127. IMUL EDX,ECX // EDX <- Qr ** Qb **
  128. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  129. IMUL EBX,ECX // EBX <- Qa ** Qg **
  130. ADD EDX,bias
  131. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  132. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  133. ADD EBX,bias
  134. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  135. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  136. // Z = P + Q (assuming no overflow at each byte)
  137. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  138. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  139. POP EBX
  140. RET
  141. @1: MOV EAX,EDX
  142. @2:
  143. {$ENDIF}
  144. // EAX <- F
  145. // EDX <- B
  146. {$IFDEF TARGET_x64}
  147. MOV RAX, RCX
  148. // Test Fa = 255 ?
  149. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  150. JNC @2
  151. // Test Fa = 0 ?
  152. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  153. JZ @1
  154. // Get weight W = Fa
  155. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  156. SHR ECX,24 // ECX <- 00 00 00 Fa
  157. // P = W * F
  158. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  159. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  160. AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
  161. IMUL EAX,ECX // EAX <- Pr ** Pb **
  162. SHR R9D,8 // R9D <- 00 Fa 00 Fg
  163. IMUL R9D,ECX // R9D <- Pa ** Pg **
  164. ADD EAX,bias
  165. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  166. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  167. ADD R9D,bias
  168. AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
  169. OR EAX,R9D // EAX <- Pa Pr Pg Pb
  170. // W = 1 - W
  171. XOR ECX,$000000FF // ECX <- 1 - ECX
  172. // Q = W * B
  173. MOV R9D,EDX // R9D <- Ba Br Bg Bb
  174. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  175. AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
  176. IMUL EDX,ECX // EDX <- Qr ** Qb **
  177. SHR R9D,8 // R9D <- 00 Ba 00 Bg
  178. IMUL R9D,ECX // R9D <- Qa ** Qg **
  179. ADD EDX,bias
  180. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  181. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  182. ADD R9D,bias
  183. AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
  184. OR R9D,EDX // R9D <- Qa Qr Qg Qb
  185. // Z = P + Q (assuming no overflow at each byte)
  186. ADD EAX,R9D // EAX <- Za Zr Zg Zb
  187. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  188. RET
  189. @1: MOV EAX,EDX
  190. @2:
  191. {$ENDIF}
  192. end;
  193. //------------------------------------------------------------------------------
  194. // BlendMem
  195. //------------------------------------------------------------------------------
  196. procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  197. asm
  198. {$IFDEF TARGET_x86}
  199. // EAX <- F
  200. // [EDX] <- B
  201. // Test Fa = 0 ?
  202. TEST EAX,$FF000000 // Fa = 0 ? => do not write
  203. JZ @2
  204. // Get weight W = Fa
  205. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  206. SHR ECX,24 // ECX <- 00 00 00 Fa
  207. // Test Fa = 255 ?
  208. CMP ECX,$FF
  209. JZ @1
  210. PUSH EBX
  211. PUSH ESI
  212. // P = W * F
  213. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  214. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  215. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  216. IMUL EAX,ECX // EAX <- Pr ** Pb **
  217. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  218. IMUL EBX,ECX // EBX <- Pa ** Pg **
  219. ADD EAX,bias // add bias
  220. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  221. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  222. ADD EBX,bias // add bias
  223. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  224. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  225. MOV ESI,[EDX]
  226. // W = 1 - W
  227. XOR ECX,$000000FF // ECX <- 1 - ECX
  228. // Q = W * B
  229. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  230. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  231. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  232. IMUL ESI,ECX // ESI <- Qr ** Qb **
  233. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  234. IMUL EBX,ECX // EBX <- Qa ** Qg **
  235. ADD ESI,bias // add bias
  236. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  237. SHR ESI,8 // ESI <- 00 Qr 00 Qb
  238. ADD EBX,bias // add bias
  239. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  240. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  241. // Z = P + Q (assuming no overflow at each byte)
  242. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  243. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  244. MOV [EDX],EAX
  245. POP ESI
  246. POP EBX
  247. RET
  248. @1: MOV [EDX],EAX
  249. @2:
  250. {$ENDIF}
  251. {$IFDEF TARGET_x64}
  252. // ECX <- F
  253. // [RDX] <- B
  254. // Test Fa = 0 ?
  255. TEST ECX,$FF000000 // Fa = 0 ? => do not write
  256. JZ @2
  257. MOV EAX, ECX // EAX <- Fa Fr Fg Fb
  258. // Get weight W = Fa
  259. SHR ECX,24 // ECX <- 00 00 00 Fa
  260. // Test Fa = 255 ?
  261. CMP ECX,$FF
  262. JZ @1
  263. // P = W * F
  264. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  265. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  266. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  267. IMUL EAX,ECX // EAX <- Pr ** Pb **
  268. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  269. IMUL R8D,ECX // R8D <- Pa ** Pg **
  270. ADD EAX,bias
  271. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  272. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  273. ADD R8D,bias
  274. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  275. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  276. MOV R9D,[RDX]
  277. // W = 1 - W
  278. XOR ECX,$000000FF // ECX <- 1 - ECX
  279. // Q = W * B
  280. MOV R8D,R9D // R8D <- Ba Br Bg Bb
  281. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  282. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  283. IMUL R9D,ECX // R9D <- Qr ** Qb **
  284. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  285. IMUL R8D,ECX // R8D <- Qa ** Qg **
  286. ADD R9D,bias
  287. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  288. SHR R9D,8 // R9D <- 00 Qr 00 Qb
  289. ADD R8D,bias
  290. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  291. OR R8D,R9D // R8D <- Qa Qr Qg Qb
  292. // Z = P + Q (assuming no overflow at each byte)
  293. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  294. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  295. MOV [RDX],EAX
  296. RET
  297. @1: MOV [RDX],EAX
  298. @2:
  299. {$ENDIF}
  300. end;
  301. //------------------------------------------------------------------------------
  302. // BlendRegEx
  303. //------------------------------------------------------------------------------
  304. function BlendRegEx_ASM(F, B: TColor32; M: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  305. asm
  306. // blend foreground color (F) to a background color (B),
  307. // using alpha channel value of F multiplied by master alpha (M)
  308. // no checking for M = $FF, in this case Graphics32 uses BlendReg
  309. // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
  310. // Result Z = P + Q
  311. // EAX <- F
  312. // EDX <- B
  313. // ECX <- M
  314. {$IFDEF TARGET_x86}
  315. // Check Fa > 0 ?
  316. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  317. JZ @2
  318. PUSH EBX
  319. // Get weight W = Fa * M
  320. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  321. INC ECX // 255:256 range bias
  322. SHR EBX,24 // EBX <- 00 00 00 Fa
  323. IMUL ECX,EBX // ECX <- 00 00 W **
  324. SHR ECX,8 // ECX <- 00 00 00 W
  325. JZ @1 // W = 0 ? => Result := EDX
  326. // P = W * F
  327. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  328. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  329. AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
  330. IMUL EAX,ECX // EAX <- Pr ** Pb **
  331. SHR EBX,8 // EBX <- 00 00 00 Fg
  332. IMUL EBX,ECX // EBX <- Pa ** Pg **
  333. ADD EAX,bias
  334. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  335. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  336. ADD EBX,bias
  337. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  338. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  339. // W = 1 - W
  340. XOR ECX,$000000FF // ECX <- 1 - ECX
  341. // Q = W * B
  342. MOV EBX,EDX // EBX <- 00 Br Bg Bb
  343. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  344. AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
  345. IMUL EDX,ECX // EDX <- Qr ** Qb **
  346. SHR EBX,8 // EBX <- 00 00 00 Bg
  347. IMUL EBX,ECX // EBX <- Qa ** Qg **
  348. ADD EDX,bias
  349. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  350. SHR EDX,8 // EDX <- 00 Qr 00 Qb
  351. ADD EBX,bias
  352. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  353. OR EBX,EDX // EBX <- 00 Qr Qg Qb
  354. // Z = P + Q (assuming no overflow at each byte)
  355. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  356. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  357. POP EBX
  358. RET
  359. @1:
  360. POP EBX
  361. @2: MOV EAX,EDX
  362. {$ENDIF}
  363. {$IFDEF TARGET_x64}
  364. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  365. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  366. JZ @1
  367. // Get weight W = Fa * M
  368. INC R8D // 255:256 range bias
  369. SHR ECX,24 // ECX <- 00 00 00 Fa
  370. IMUL R8D,ECX // R8D <- 00 00 W **
  371. SHR R8D,8 // R8D <- 00 00 00 W
  372. JZ @1 // W = 0 ? => Result := EDX
  373. // P = W * F
  374. MOV ECX,EAX // ECX <- ** Fr Fg Fb
  375. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  376. AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
  377. IMUL EAX,R8D // EAX <- Pr ** Pb **
  378. SHR ECX,8 // ECX <- 00 Fa 00 Fg
  379. IMUL ECX,R8D // ECX <- Pa ** Pg **
  380. ADD EAX,bias
  381. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  382. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  383. ADD ECX,bias
  384. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  385. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  386. // W = 1 - W
  387. XOR R8D,$000000FF // R8D <- 1 - R8D
  388. // Q = W * B
  389. MOV ECX,EDX // ECX <- 00 Br Bg Bb
  390. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  391. AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
  392. IMUL EDX,R8D // EDX <- Qr ** Qb **
  393. SHR ECX,8 // ECX <- 00 Ba 00 Bg
  394. IMUL ECX,R8D // ECX <- Qa ** Qg **
  395. ADD EDX,bias
  396. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  397. SHR EDX,8 // EDX <- 00 Qr ** Qb
  398. ADD ECX,bias
  399. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  400. OR ECX,EDX // ECX <- Qa Qr Qg Qb
  401. // Z = P + Q (assuming no overflow at each byte)
  402. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  403. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  404. RET
  405. @1: MOV EAX,EDX
  406. {$ENDIF}
  407. end;
  408. //------------------------------------------------------------------------------
  409. // BlendMemEx
  410. //------------------------------------------------------------------------------
  411. procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  412. asm
  413. {$IFDEF TARGET_x86}
  414. // EAX <- F
  415. // [EDX] <- B
  416. // ECX <- M
  417. // Check Fa > 0 ?
  418. TEST EAX,$FF000000 // Fa = 0? => write nothing
  419. JZ @2
  420. PUSH EBX
  421. // Get weight W = Fa * M
  422. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  423. SHR EBX,24 // EBX <- 00 00 00 Fa
  424. INC ECX // 255:256 range bias for M
  425. IMUL ECX,EBX // ECX <- 00 00 W **
  426. SHR ECX,8 // ECX <- 00 00 00 W
  427. JZ @1 // W = 0 ? => write nothing
  428. PUSH ESI
  429. // P = W * F
  430. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  431. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  432. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  433. IMUL EAX,ECX // EAX <- Pr ** Pb **
  434. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  435. IMUL EBX,ECX // EBX <- Pa ** Pg **
  436. ADD EAX,bias
  437. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  438. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  439. ADD EBX,bias
  440. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  441. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  442. // W = 1 - W;
  443. MOV ESI,[EDX]
  444. XOR ECX,$000000FF // ECX <- 1 - ECX
  445. // Q = W * B
  446. MOV EBX,ESI // EBX <- 00 Br Bg Bb
  447. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  448. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  449. IMUL ESI,ECX // ESI <- Qr ** Qb **
  450. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  451. IMUL EBX,ECX // EBX <- Qa ** Qg **
  452. ADD ESI,bias
  453. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  454. SHR ESI,8 // ESI <- 00 Qr ** Qb
  455. ADD EBX,bias
  456. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  457. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  458. // Z = P + Q (assuming no overflow at each byte)
  459. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  460. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  461. MOV [EDX],EAX
  462. POP ESI
  463. @1: POP EBX
  464. @2:
  465. {$ENDIF}
  466. {$IFDEF TARGET_x64}
  467. // ECX <- F
  468. // [RDX] <- B
  469. // R8 <- M
  470. // ECX <- F
  471. // [EDX] <- B
  472. // R8 <- M
  473. // Check Fa > 0 ?
  474. TEST ECX,$FF000000 // Fa = 0? => write nothing
  475. JZ @1
  476. // Get weight W = Fa * M
  477. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  478. INC R8D // 255:256 range bias
  479. SHR EAX,24 // EAX <- 00 00 00 Fa
  480. IMUL R8D,EAX // R8D <- 00 00 W **
  481. ADD R8D,bias
  482. SHR R8D,8 // R8D <- 00 00 00 W
  483. JZ @1 // W = 0 ? => write nothing
  484. // P = W * F
  485. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  486. AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
  487. AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
  488. IMUL ECX,R8D // ECX <- Pr ** Pb **
  489. SHR EAX,8 // EAX <- 00 Fa 00 Fg
  490. IMUL EAX,R8D // EAX <- Pa 00 Pg **
  491. ADD ECX,bias
  492. AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
  493. SHR ECX,8 // ECX <- 00 Pr 00 Pb
  494. ADD EAX,bias
  495. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  496. OR ECX,EAX // ECX <- Pa Pr Pg Pb
  497. // W = 1 - W
  498. MOV R9D,[RDX]
  499. XOR R8D,$000000FF // R8D <- 1 - R8
  500. // Q = W * B
  501. MOV EAX,R9D // EAX <- 00 Br Bg Bb
  502. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  503. AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
  504. IMUL R9D,R8D // R9D <- Qr ** Qb **
  505. SHR EAX,8 // EAX <- 00 00 00 Bg
  506. IMUL EAX,R8D // EAX <- 00 00 Qg **
  507. ADD R9D,bias
  508. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  509. SHR R9D,8 // R9D <- 00 Qr ** Qb
  510. ADD EAX,bias
  511. AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
  512. OR EAX,R9D // EAX <- 00 Qr Qg Qb
  513. // Z = P + Q (assuming no overflow at each byte)
  514. ADD ECX,EAX // ECX <- 00 Zr Zg Zb
  515. MOV [RDX],ECX
  516. @1:
  517. {$ENDIF}
  518. end;
  519. //------------------------------------------------------------------------------
  520. // BlendLine
  521. //------------------------------------------------------------------------------
  522. procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  523. asm
  524. {$IFDEF TARGET_x86}
  525. // EAX <- Src
  526. // EDX <- Dst
  527. // ECX <- Count
  528. // test the counter for zero or negativity
  529. TEST ECX,ECX
  530. JLE @4
  531. PUSH EBX
  532. PUSH ESI
  533. PUSH EDI
  534. MOV ESI,EAX // ESI <- Src
  535. MOV EDI,EDX // EDI <- Dst
  536. // loop start
  537. @1: MOV EAX,[ESI]
  538. TEST EAX,$FF000000
  539. JZ @3 // complete transparency, proceed to next point
  540. PUSH ECX // store counter
  541. // Get weight W = Fa
  542. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  543. SHR ECX,24 // ECX <- 00 00 00 Fa
  544. // Test Fa = 255 ?
  545. CMP ECX,$FF
  546. JZ @2
  547. // P = W * F
  548. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  549. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  550. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  551. IMUL EAX,ECX // EAX <- Pr ** Pb **
  552. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  553. IMUL EBX,ECX // EBX <- Pa ** Pg **
  554. ADD EAX,bias
  555. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  556. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  557. ADD EBX,bias
  558. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  559. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  560. // W = 1 - W;
  561. MOV EDX,[EDI]
  562. XOR ECX,$000000FF // ECX <- 1 - ECX
  563. // Q = W * B
  564. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  565. AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
  566. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  567. IMUL EDX,ECX // EDX <- Qr ** Qb **
  568. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  569. IMUL EBX,ECX // EBX <- Qa ** Qg **
  570. ADD EDX,bias
  571. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  572. SHR EDX,8 // EDX <- 00 Qr ** Qb
  573. ADD EBX,bias
  574. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  575. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  576. // Z = P + Q (assuming no overflow at each byte)
  577. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  578. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  579. @2:
  580. MOV [EDI],EAX
  581. POP ECX // restore counter
  582. @3:
  583. ADD ESI,4
  584. ADD EDI,4
  585. // loop end
  586. DEC ECX
  587. JNZ @1
  588. POP EDI
  589. POP ESI
  590. POP EBX
  591. @4:
  592. {$ENDIF}
  593. {$IFDEF TARGET_x64}
  594. // RCX <- Src
  595. // RDX <- Dst
  596. // R8 <- Count
  597. // test the counter for zero or negativity
  598. TEST R8D,R8D
  599. JLE @4
  600. MOV R10,RCX // R10 <- Src
  601. MOV R11,RDX // R11 <- Dst
  602. MOV ECX,R8D // RCX <- Count
  603. // loop start
  604. @1:
  605. MOV EAX,[R10]
  606. TEST EAX,$FF000000
  607. JZ @3 // complete transparency, proceed to next point
  608. // Get weight W = Fa
  609. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  610. SHR R9D,24 // R9D <- 00 00 00 Fa
  611. // Test Fa = 255 ?
  612. CMP R9D,$FF
  613. JZ @2
  614. // P = W * F
  615. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  616. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  617. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  618. IMUL EAX,R9D // EAX <- Pr ** Pb **
  619. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  620. IMUL R8D,R9D // R8D <- Pa ** Pg **
  621. ADD EAX,bias
  622. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  623. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  624. ADD R8D,bias
  625. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  626. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  627. // W = 1 - W;
  628. MOV EDX,[R11]
  629. XOR R9D,$000000FF // R9D <- 1 - R9D
  630. // Q = W * B
  631. MOV R8D,EDX // R8D <- Ba Br Bg Bb
  632. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  633. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  634. IMUL EDX,R9D // EDX <- Qr ** Qb **
  635. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  636. IMUL R8D,R9D // R8D <- Qa ** Qg **
  637. ADD EDX,bias
  638. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  639. SHR EDX,8 // EDX <- 00 Qr ** Qb
  640. ADD R8D,bias
  641. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  642. OR R8D,EDX // R8D <- Qa Qr Qg Qb
  643. // Z = P + Q (assuming no overflow at each byte)
  644. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  645. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  646. @2:
  647. MOV [R11],EAX
  648. @3:
  649. ADD R10,4
  650. ADD R11,4
  651. // loop end
  652. DEC ECX
  653. JNZ @1
  654. @4:
  655. {$ENDIF}
  656. end;
  657. //------------------------------------------------------------------------------
  658. // BlendMems
  659. //------------------------------------------------------------------------------
  660. procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  661. asm
  662. {$IFDEF TARGET_x86}
  663. // EAX <- Src
  664. // EDX <- Dst
  665. // ECX <- Count
  666. // test the counter for zero or negativity
  667. TEST ECX,ECX
  668. JLE @Done
  669. // test if source if fully transparent
  670. TEST EAX,$FF000000
  671. JZ @Done
  672. PUSH EBX
  673. PUSH ESI
  674. PUSH EDI
  675. MOV ESI,EAX // ESI <- Src
  676. MOV EDI,EDX // EDI <- Dst
  677. // Get weight W = Fa
  678. SHR ESI, 24 // ESI <- W
  679. // test if source is fully opaque
  680. CMP ESI,$FF
  681. JZ @CopySource
  682. // P = W * F
  683. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  684. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  685. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  686. IMUL EAX,ESI // EAX <- Pr ** Pb **
  687. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  688. IMUL EBX,ESI // EBX <- Pa ** Pg **
  689. ADD EAX,bias
  690. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  691. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  692. ADD EBX,bias
  693. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  694. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  695. XOR ESI,$000000FF // ESI <- 1 - Fa
  696. // loop start
  697. @BlendPixelLoop:
  698. MOV EDX,[EDI] // EDX <- Dest^
  699. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  700. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  701. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  702. IMUL EDX,ESI // EDX <- Qr ** Qb **
  703. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  704. IMUL EBX,ESI // EBX <- Qa ** Qg **
  705. ADD EDX,bias
  706. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  707. SHR EDX,8 // EDX <- 00 Qr ** Qb
  708. ADD EBX,bias
  709. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  710. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  711. // Z = P + Q (assuming no overflow at each byte)
  712. ADD EBX,EAX // EBX <- Za Zr Zg Zb
  713. OR EBX,$FF000000 // EBX <- FF Zr Zg Zb
  714. MOV [EDI],EBX // Dest^<- EBX
  715. ADD EDI,4 // Inc(Dest)
  716. DEC ECX // Dec(Count)
  717. JNZ @BlendPixelLoop
  718. POP EDI
  719. POP ESI
  720. POP EBX
  721. @Done:
  722. RET
  723. @CopySource:
  724. MOV [EDI],EAX // Dest^<- Src
  725. ADD EDI,4 // Inc(Dest)
  726. DEC ECX // Dec(Count)
  727. JNZ @CopySource
  728. POP EDI
  729. POP ESI
  730. POP EBX
  731. {$ENDIF}
  732. {$IFDEF TARGET_x64}
  733. // RCX <- Src
  734. // RDX <- Dst
  735. // R8 <- Count
  736. // test the counter for zero or negativity
  737. TEST R8D,R8D // R8D <- Count
  738. JLE @Done
  739. // test if source if fully transparent
  740. TEST ECX,$FF000000
  741. JZ @Done
  742. PUSH RDI
  743. MOV RDI,RDX // RDI <- Dst
  744. MOV R9D,ECX // R9D <- Src
  745. // Get weight W = Fa
  746. SHR R9D,24 // R9D <- W
  747. // Test Fa = 255 ?
  748. CMP R9D,$FF
  749. JZ @CopySource // complete opaque,copy source
  750. // P = W * F
  751. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  752. AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
  753. AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
  754. IMUL ECX,R9D // ECX <- Pr ** Pb **
  755. SHR EAX,8 // EAX <- 00 Fa 00 Fg
  756. IMUL EAX,R9D // EAX <- Pa ** Pg **
  757. ADD ECX,Bias
  758. AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
  759. SHR ECX,8 // ECX <- 00 Pr 00 Pb
  760. ADD EAX,Bias
  761. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  762. OR ECX,EAX // ECX <- Pa Pr Pg Pb
  763. XOR R9D,$000000FF // R9D <- 1 - Fa
  764. // loop start
  765. @BlendPixelLoop:
  766. MOV EDX,[RDI]
  767. MOV EAX,EDX // EAX <- Ba Br Bg Bb
  768. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  769. AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
  770. IMUL EDX,R9D // EDX <- Qr ** Qb **
  771. SHR EAX,8 // EAX <- 00 Ba 00 Bg
  772. IMUL EAX,R9D // EAX <- Qa ** Qg **
  773. ADD EDX,Bias
  774. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  775. SHR EDX,8 // EDX <- 00 Qr ** Qb
  776. ADD EAX,Bias
  777. AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
  778. OR EAX,EDX // EAX <- Qa Qr Qg Qb
  779. // Z = P + Q (assuming no overflow at each byte)
  780. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  781. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  782. MOV [RDI],EAX
  783. ADD RDI,4
  784. // loop end
  785. DEC R8D
  786. JNZ @BlendPixelLoop
  787. POP RDI
  788. @Done:
  789. RET
  790. @CopySource:
  791. // just copy source
  792. MOV [RDI],ECX
  793. ADD RDI,4
  794. DEC R8D
  795. JNZ @CopySource
  796. POP RDI
  797. {$ENDIF}
  798. end;
  799. //------------------------------------------------------------------------------
  800. //
  801. // Merge
  802. //
  803. //------------------------------------------------------------------------------
  804. //------------------------------------------------------------------------------
  805. // MergeReg
  806. //------------------------------------------------------------------------------
  807. {$IFDEF TARGET_x86}
  808. function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  809. asm
  810. { This is an implementation of the merge formula, as described
  811. in a paper by Bruce Wallace in 1981. Merging is associative,
  812. that is, A over (B over C) = (A over B) over C. The formula is,
  813. Ra = Fa + Ba * (1 - Fa)
  814. Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
  815. where
  816. Rc is the resultant color,
  817. Ra is the resultant alpha,
  818. Fc is the foreground color,
  819. Fa is the foreground alpha,
  820. Bc is the background color,
  821. Ba is the background alpha.
  822. }
  823. // EAX <- F
  824. // EDX <- B
  825. // if F.A = 0 then
  826. TEST EAX,$FF000000
  827. JZ @exit0
  828. // else if B.A = 255 then
  829. CMP EDX,$FF000000
  830. JNC @blend
  831. // else if F.A = 255 then
  832. CMP EAX,$FF000000
  833. JNC @Exit
  834. // else if B.A = 0 then
  835. TEST EDX,$FF000000
  836. JZ @Exit
  837. @4:
  838. PUSH EBX
  839. PUSH ESI
  840. PUSH EDI
  841. ADD ESP,-$0C
  842. MOV [ESP+$04],EDX
  843. MOV [ESP],EAX
  844. // AH <- F.A
  845. // DL, CL <- B.A
  846. SHR EAX,16
  847. AND EAX,$0000FF00
  848. SHR EDX,24
  849. MOV CL,DL
  850. NOP
  851. NOP
  852. NOP
  853. // EDI <- PF
  854. // EDX <- PB
  855. // ESI <- PR
  856. // PF := @MulDiv255Table[F.A];
  857. LEA EDI,[EAX+MulDiv255Table]
  858. // PB := @MulDiv255Table[B.A];
  859. SHL EDX,$08
  860. LEA EDX,[EDX+MulDiv255Table]
  861. // Result.A := B.A + F.A - PB[F.A];
  862. SHR EAX,8
  863. ADD ECX,EAX
  864. SUB ECX,[EDX+EAX]
  865. MOV [ESP+$0B],CL
  866. // PR := @DivMul255Table[Result.A];
  867. SHL ECX,$08
  868. AND ECX,$0000FFFF
  869. LEA ESI,[ECX+DivMul255Table]
  870. { Red component }
  871. // Result.R := PB[B.R];
  872. XOR EAX,EAX
  873. MOV AL,[ESP+$06]
  874. MOV CL,[EDX+EAX]
  875. MOV [ESP+$0a],CL
  876. // X := F.R - Result.R;
  877. MOV AL,[ESP+$02]
  878. XOR EBX,EBX
  879. MOV BL,CL
  880. SUB EAX,EBX
  881. // if X >= 0 then
  882. JL @5
  883. // Result.R := PR[PF[X] + Result.R]
  884. MOVZX EAX,BYTE PTR[EDI+EAX]
  885. AND ECX,$000000FF
  886. ADD EAX,ECX
  887. MOV AL,[ESI+EAX]
  888. MOV [ESP+$0A],AL
  889. JMP @6
  890. @5:
  891. // Result.R := PR[Result.R - PF[-X]];
  892. NEG EAX
  893. MOVZX EAX,BYTE PTR[EDI+EAX]
  894. XOR ECX,ECX
  895. MOV CL,[ESP+$0A]
  896. SUB ECX,EAX
  897. MOV AL,[ESI+ECX]
  898. MOV [ESP+$0A],AL
  899. { Green component }
  900. @6:
  901. // Result.G := PB[B.G];
  902. XOR EAX,EAX
  903. MOV AL,[ESP+$05]
  904. MOV CL,[EDX+EAX]
  905. MOV [ESP+$09],CL
  906. // X := F.G - Result.G;
  907. MOV AL,[ESP+$01]
  908. XOR EBX,EBX
  909. MOV BL,CL
  910. SUB EAX,EBX
  911. // if X >= 0 then
  912. JL @7
  913. // Result.G := PR[PF[X] + Result.G]
  914. MOVZX EAX,BYTE PTR[EDI+EAX]
  915. AND ECX,$000000FF
  916. ADD EAX,ECX
  917. MOV AL,[ESI+EAX]
  918. MOV [ESP+$09],AL
  919. JMP @8
  920. @7:
  921. // Result.G := PR[Result.G - PF[-X]];
  922. NEG EAX
  923. MOVZX EAX,BYTE PTR[EDI+EAX]
  924. XOR ECX,ECX
  925. MOV CL,[ESP+$09]
  926. SUB ECX,EAX
  927. MOV AL,[ESI+ECX]
  928. MOV [ESP+$09],AL
  929. { Blue component }
  930. @8:
  931. // Result.B := PB[B.B];
  932. XOR EAX,EAX
  933. MOV AL,[ESP+$04]
  934. MOV CL,[EDX+EAX]
  935. MOV [ESP+$08],CL
  936. // X := F.B - Result.B;
  937. MOV AL,[ESP]
  938. XOR EDX,EDX
  939. MOV DL,CL
  940. SUB EAX,EDX
  941. // if X >= 0 then
  942. JL @9
  943. // Result.B := PR[PF[X] + Result.B]
  944. MOVZX EAX,BYTE PTR[EDI+EAX]
  945. XOR EDX,EDX
  946. MOV DL,CL
  947. ADD EAX,EDX
  948. MOV AL,[ESI+EAX]
  949. MOV [ESP+$08],AL
  950. JMP @10
  951. @9:
  952. // Result.B := PR[Result.B - PF[-X]];
  953. NEG EAX
  954. MOVZX EAX,BYTE PTR[EDI+EAX]
  955. XOR EDX,EDX
  956. MOV DL,CL
  957. SUB EDX,EAX
  958. MOV AL,[ESI+EDX]
  959. MOV [ESP+$08],AL
  960. @10:
  961. // EAX <- Result
  962. MOV EAX,[ESP+$08]
  963. // end;
  964. ADD ESP,$0C
  965. POP EDI
  966. POP ESI
  967. POP EBX
  968. RET
  969. @blend:
  970. CALL DWORD PTR [BlendReg]
  971. OR EAX,$FF000000
  972. RET
  973. @exit0:
  974. MOV EAX,EDX
  975. @Exit:
  976. end;
  977. {$ENDIF}
  978. //------------------------------------------------------------------------------
  979. //
  980. // Combine
  981. //
  982. //------------------------------------------------------------------------------
  983. //------------------------------------------------------------------------------
  984. // CombineReg
  985. //------------------------------------------------------------------------------
  986. function CombineReg_ASM(X, Y: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  987. asm
  988. // combine RGBA channels of colors X and Y with the weight of X given in W
  989. // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
  990. {$IFDEF TARGET_x86}
  991. // EAX <- X
  992. // EDX <- Y
  993. // ECX <- W
  994. // W = 0 or $FF?
  995. JCXZ @1 // CX = 0 ? => Result := EDX
  996. CMP ECX,$FF // CX = $FF ? => Result := EDX
  997. JE @2
  998. PUSH EBX
  999. // P = W * X
  1000. MOV EBX,EAX // EBX <- Xa Xr Xg Xb
  1001. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1002. AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
  1003. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1004. SHR EBX,8 // EBX <- 00 Xa 00 Xg
  1005. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1006. ADD EAX,bias
  1007. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1008. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1009. ADD EBX,bias
  1010. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1011. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1012. // W = 1 - W
  1013. XOR ECX,$000000FF // ECX <- 1 - ECX
  1014. MOV EBX,EDX // EBX <- Ya Yr Yg Yb
  1015. // Q = W * Y
  1016. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1017. AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
  1018. IMUL EDX,ECX // EDX <- Qr ** Qb **
  1019. SHR EBX,8 // EBX <- 00 Ya 00 Yg
  1020. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1021. ADD EDX,bias
  1022. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1023. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1024. ADD EBX,bias
  1025. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1026. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  1027. // Z = P + Q (assuming no overflow at each byte)
  1028. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1029. POP EBX
  1030. RET
  1031. @1: MOV EAX,EDX
  1032. @2:
  1033. {$ENDIF}
  1034. {$IFDEF TARGET_x64}
  1035. // ECX <- X
  1036. // EDX <- Y
  1037. // R8D <- W
  1038. // W = 0 or $FF?
  1039. TEST R8D,R8D
  1040. JZ @1 // W = 0 ? => Result := EDX
  1041. MOV EAX,ECX // EAX <- Xa Xr Xg Xb
  1042. CMP R8B,$FF // W = $FF ? => Result := EDX
  1043. JE @2
  1044. // P = W * X
  1045. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1046. AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
  1047. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1048. SHR ECX,8 // ECX <- 00 Xa 00 Xg
  1049. IMUL ECX,R8D // ECX <- Pa ** Pg **
  1050. ADD EAX,bias
  1051. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1052. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1053. ADD ECX,bias
  1054. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1055. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  1056. // W = 1 - W
  1057. XOR R8D,$000000FF // R8D <- 1 - R8D
  1058. MOV ECX,EDX // ECX <- Ya Yr Yg Yb
  1059. // Q = W * Y
  1060. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1061. AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
  1062. IMUL EDX,R8D // EDX <- Qr ** Qb **
  1063. SHR ECX,8 // ECX <- 00 Ya 00 Yg
  1064. IMUL ECX,R8D // ECX <- Qa ** Qg **
  1065. ADD EDX,bias
  1066. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1067. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1068. ADD ECX,bias
  1069. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1070. OR ECX,EDX // ECX <- Qa Qr Qg Qb
  1071. // Z = P + Q (assuming no overflow at each byte)
  1072. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  1073. RET
  1074. @1: MOV EAX,EDX
  1075. @2:
  1076. {$ENDIF}
  1077. end;
  1078. //------------------------------------------------------------------------------
  1079. // CombineMem
  1080. //------------------------------------------------------------------------------
  1081. procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1082. asm
  1083. {$IFDEF TARGET_x86}
  1084. // EAX <- F
  1085. // [EDX] <- B
  1086. // ECX <- W
  1087. // Check W
  1088. JCXZ @1 // W = 0 ? => write nothing
  1089. CMP ECX,$FF // W = 255? => write F
  1090. {$IFDEF FPC}
  1091. DB $74,$76 // Prob with FPC 2.2.2 and below
  1092. {$ELSE}
  1093. JZ @2
  1094. {$ENDIF}
  1095. PUSH EBX
  1096. PUSH ESI
  1097. // P = W * F
  1098. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  1099. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1100. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  1101. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1102. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  1103. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1104. ADD EAX,bias
  1105. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1106. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1107. ADD EBX,bias
  1108. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1109. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1110. // W = 1 - W
  1111. MOV ESI,[EDX]
  1112. XOR ECX,$000000FF // ECX <- 1 - ECX
  1113. // Q = W * B
  1114. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  1115. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  1116. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  1117. IMUL ESI,ECX // ESI <- Qr ** Qb **
  1118. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  1119. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1120. ADD ESI,bias
  1121. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  1122. SHR ESI,8 // ESI <- 00 Qr ** Qb
  1123. ADD EBX,bias
  1124. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1125. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  1126. // Z = P + Q (assuming no overflow at each byte)
  1127. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1128. MOV [EDX],EAX
  1129. POP ESI
  1130. POP EBX
  1131. @1: RET
  1132. @2: MOV [EDX],EAX
  1133. {$ENDIF}
  1134. {$IFDEF TARGET_x64}
  1135. // ECX <- F
  1136. // [RDX] <- B
  1137. // R8 <- W
  1138. // Check W
  1139. TEST R8D,R8D // Set flags for R8
  1140. JZ @2 // W = 0 ? => Result := EDX
  1141. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  1142. CMP R8B,$FF // W = 255? => write F
  1143. JZ @1
  1144. // P = W * F
  1145. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1146. AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
  1147. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1148. SHR ECX,8 // ECX <- 00 Fa 00 Fg
  1149. IMUL ECX,R8D // ECX <- Pa ** Pg **
  1150. ADD EAX,bias
  1151. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1152. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1153. ADD ECX,bias
  1154. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1155. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  1156. // W = 1 - W
  1157. MOV R9D,[RDX]
  1158. XOR R8D,$000000FF // R8D <- 1 - R8D
  1159. // Q = W * B
  1160. MOV ECX,R9D // ECX <- Ba Br Bg Bb
  1161. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  1162. AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
  1163. IMUL R9D,R8D // R9D <- Qr ** Qb **
  1164. SHR ECX,8 // ECX <- 00 Ba 00 Bg
  1165. IMUL ECX,R8D // ECX <- Qa ** Qg **
  1166. ADD R9D,bias
  1167. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  1168. SHR R9D,8 // R9D <- 00 Qr ** Qb
  1169. ADD ECX,bias
  1170. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1171. OR ECX,R9D // ECX <- 00 Qr Qg Qb
  1172. // Z = P + Q (assuming no overflow at each byte)
  1173. ADD EAX,ECX // EAX <- 00 Zr Zg Zb
  1174. @1: MOV [RDX],EAX
  1175. @2:
  1176. {$ENDIF}
  1177. end;
  1178. //------------------------------------------------------------------------------
  1179. //
  1180. // Misc.
  1181. //
  1182. //------------------------------------------------------------------------------
  1183. //------------------------------------------------------------------------------
  1184. //
  1185. // Bindings
  1186. //
  1187. //------------------------------------------------------------------------------
  1188. {$IFNDEF PUREPASCAL}
  1189. procedure RegisterBindingFunctions;
  1190. begin
  1191. BlendRegistry[@@CombineReg].Add( @CombineReg_ASM, [isAssembler]).Name := 'CombineReg_ASM';
  1192. BlendRegistry[@@CombineMem].Add( @CombineMem_ASM, [isAssembler]).Name := 'CombineMem_ASM';
  1193. BlendRegistry[@@BlendReg].Add( @BlendReg_ASM, [isAssembler]).Name := 'BlendReg_ASM';
  1194. BlendRegistry[@@BlendMem].Add( @BlendMem_ASM, [isAssembler]).Name := 'BlendMem_ASM';
  1195. BlendRegistry[@@BlendMems].Add( @BlendMems_ASM, [isAssembler]).Name := 'BlendMems_ASM';
  1196. BlendRegistry[@@BlendRegEx].Add( @BlendRegEx_ASM, [isAssembler]).Name := 'BlendRegEx_ASM';
  1197. {$IFDEF TARGET_X86}
  1198. BlendRegistry[@@BlendMemEx].Add( @BlendMemEx_ASM, [isAssembler]).Name := 'BlendMemEx_ASM'; // Implemented on x64 but broken
  1199. {$ENDIF}
  1200. BlendRegistry[@@BlendLine].Add( @BlendLine_ASM, [isAssembler]).Name := 'BlendLine_ASM';
  1201. {$IFNDEF TARGET_x64}
  1202. BlendRegistry[@@MergeReg].Add( @MergeReg_ASM, [isAssembler]).Name := 'MergeReg_ASM';
  1203. {$ENDIF}
  1204. end;
  1205. {$ENDIF}
  1206. //------------------------------------------------------------------------------
  1207. //------------------------------------------------------------------------------
  1208. //------------------------------------------------------------------------------
  1209. initialization
  1210. {$IFNDEF PUREPASCAL}
  1211. RegisterBindingFunctions;
  1212. {$ENDIF}
  1213. end.