GR32_Blend.pas 116 KB


  1. unit GR32_Blend;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is Graphics32
  23. *
  24. * The Initial Developer of the Original Code is
  25. * Alex A. Denisov
  26. *
  27. * Portions created by the Initial Developer are Copyright (C) 2000-2009
  28. * the Initial Developer. All Rights Reserved.
  29. *
  30. * Contributor(s):
  31. * Mattias Andersson
  32. * - 2004/07/07 - MMX Blendmodes
  33. * - 2004/12/10 - _MergeReg, M_MergeReg
  34. *
  35. * Michael Hansen <[email protected]>
  36. * - 2004/07/07 - Pascal Blendmodes, function setup
  37. * - 2005/08/19 - New merge table concept and reference implementations
  38. *
  39. * Bob Voigt
  40. * - 2004/08/25 - ColorDiv
  41. *
  42. * ***** END LICENSE BLOCK ***** *)
  43. interface
  44. {$I GR32.inc}
  45. uses
  46. GR32, GR32_Bindings, SysUtils;
  47. var
  48. MMX_ACTIVE: Boolean;
  49. type
  50. { Function Prototypes }
  51. TBlendReg = function(F, B: TColor32): TColor32;
  52. TBlendMem = procedure(F: TColor32; var B: TColor32);
  53. TBlendMems = procedure(F: TColor32; B: PColor32; Count: Integer);
  54. TBlendRegEx = function(F, B, M: TColor32): TColor32;
  55. TBlendMemEx = procedure(F: TColor32; var B: TColor32; M: TColor32);
  56. TBlendRegRGB = function(F, B, W: TColor32): TColor32;
  57. TBlendMemRGB = procedure(F: TColor32; var B: TColor32; W: TColor32);
  58. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  59. TBlendMemRGB128 = procedure(F: TColor32; var B: TColor32; W: UInt64);
  60. {$ENDIF}
  61. TBlendLine = procedure(Src, Dst: PColor32; Count: Integer);
  62. TBlendLineEx = procedure(Src, Dst: PColor32; Count: Integer; M: TColor32);
  63. TCombineReg = function(X, Y, W: TColor32): TColor32;
  64. TCombineMem = procedure(X: TColor32; var Y: TColor32; W: TColor32);
  65. TCombineLine = procedure(Src, Dst: PColor32; Count: Integer; W: TColor32);
  66. TLightenReg = function(C: TColor32; Amount: Integer): TColor32;
  67. var
  68. {$IFNDEF OMIT_MMX}
  69. EMMS: procedure;
  70. {$ENDIF}
  71. { Function Variables }
  72. BlendReg: TBlendReg;
  73. BlendMem: TBlendMem;
  74. BlendMems: TBlendMems;
  75. BlendRegEx: TBlendRegEx;
  76. BlendMemEx: TBlendMemEx;
  77. BlendRegRGB: TBlendRegRGB;
  78. BlendMemRGB: TBlendMemRGB;
  79. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  80. BlendMemRGB128: TBlendMemRGB128;
  81. {$ENDIF}
  82. BlendLine: TBlendLine;
  83. BlendLineEx: TBlendLineEx;
  84. CombineReg: TCombineReg;
  85. CombineMem: TCombineMem;
  86. CombineLine: TCombineLine;
  87. MergeReg: TBlendReg;
  88. MergeMem: TBlendMem;
  89. MergeRegEx: TBlendRegEx;
  90. MergeMemEx: TBlendMemEx;
  91. MergeLine: TBlendLine;
  92. MergeLineEx: TBlendLineEx;
  93. { Color algebra functions }
  94. ColorAdd: TBlendReg;
  95. ColorSub: TBlendReg;
  96. ColorDiv: TBlendReg;
  97. ColorModulate: TBlendReg;
  98. ColorMax: TBlendReg;
  99. ColorMin: TBlendReg;
  100. ColorDifference: TBlendReg;
  101. ColorAverage: TBlendReg;
  102. ColorExclusion: TBlendReg;
  103. ColorScale: TBlendReg;
  104. { Special LUT pointers }
  105. AlphaTable: Pointer;
  106. bias_ptr: Pointer;
  107. alpha_ptr: Pointer;
  108. { Misc stuff }
  109. LightenReg: TLightenReg;
  110. function Lighten(C: TColor32; Amount: Integer): TColor32; {$IFDEF USEINLINING} inline; {$ENDIF}
  111. { Access to alpha composite functions corresponding to a combine mode }
  112. const
  113. BLEND_REG: array[TCombineMode] of ^TBlendReg = ((@@BlendReg),(@@MergeReg));
  114. BLEND_MEM: array[TCombineMode] of ^TBlendMem = ((@@BlendMem),(@@MergeMem));
  115. BLEND_REG_EX: array[TCombineMode] of ^TBlendRegEx = ((@@BlendRegEx),(@@MergeRegEx));
  116. BLEND_MEM_EX: array[TCombineMode] of ^TBlendMemEx = ((@@BlendMemEx),(@@MergeMemEx));
  117. BLEND_LINE: array[TCombineMode] of ^TBlendLine = ((@@BlendLine),(@@MergeLine));
  118. BLEND_LINE_EX: array[TCombineMode] of ^TBlendLineEx = ((@@BlendLineEx),(@@MergeLineEx));
  119. var
  120. BlendRegistry: TFunctionRegistry;
  121. {$IFDEF OMIT_MMX}
  122. procedure EMMS; {$IFDEF USEINLINING} inline; {$ENDIF}
  123. {$ENDIF}
  124. var
  125. RcTable: array [Byte, Byte] of Byte;
  126. DivTable: array [Byte, Byte] of Byte;
  127. implementation
  128. uses
  129. GR32_System;
  130. {$IFDEF OMIT_MMX}
  131. procedure EMMS;
  132. begin
  133. end;
  134. {$ENDIF}
  135. { Pure Pascal }
  136. function BlendReg_Pas(F, B: TColor32): TColor32;
  137. var
  138. FX: TColor32Entry absolute F;
  139. BX: TColor32Entry absolute B;
  140. Af, Ab: PByteArray;
  141. FA : Byte;
  142. begin
  143. FA := FX.A;
  144. if FA = 0 then
  145. begin
  146. Result := B;
  147. Exit;
  148. end;
  149. if FA = $FF then
  150. begin
  151. Result := F;
  152. Exit;
  153. end;
  154. with BX do
  155. begin
  156. Af := @DivTable[FA];
  157. Ab := @DivTable[not FA];
  158. R := Af[FX.R] + Ab[R];
  159. G := Af[FX.G] + Ab[G];
  160. B := Af[FX.B] + Ab[B];
  161. end;
  162. Result := B;
  163. end;
  164. procedure BlendMem_Pas(F: TColor32; var B: TColor32);
  165. var
  166. FX: TColor32Entry absolute F;
  167. BX: TColor32Entry absolute B;
  168. Af, Ab: PByteArray;
  169. FA : Byte;
  170. begin
  171. FA := FX.A;
  172. if FA = 0 then Exit;
  173. if FA = $FF then
  174. begin
  175. B := F;
  176. Exit;
  177. end;
  178. with BX do
  179. begin
  180. Af := @DivTable[FA];
  181. Ab := @DivTable[not FA];
  182. R := Af[FX.R] + Ab[R];
  183. G := Af[FX.G] + Ab[G];
  184. B := Af[FX.B] + Ab[B];
  185. end;
  186. end;
  187. procedure BlendMems_Pas(F: TColor32; B: PColor32; Count: Integer);
  188. begin
  189. while Count > 0 do
  190. begin
  191. BlendMem(F, B^);
  192. Inc(B);
  193. Dec(Count);
  194. end;
  195. end;
  196. function BlendRegEx_Pas(F, B, M: TColor32): TColor32;
  197. var
  198. FX: TColor32Entry absolute F;
  199. BX: TColor32Entry absolute B;
  200. Af, Ab: PByteArray;
  201. begin
  202. Af := @DivTable[M];
  203. M := Af[FX.A];
  204. if M = 0 then
  205. begin
  206. Result := B;
  207. Exit;
  208. end;
  209. if M = $FF then
  210. begin
  211. Result := F;
  212. Exit;
  213. end;
  214. with BX do
  215. begin
  216. Af := @DivTable[M];
  217. Ab := @DivTable[255 - M];
  218. R := Af[FX.R] + Ab[R];
  219. G := Af[FX.G] + Ab[G];
  220. B := Af[FX.B] + Ab[B];
  221. end;
  222. Result := B;
  223. end;
  224. procedure BlendMemEx_Pas(F: TColor32; var B: TColor32; M: TColor32);
  225. var
  226. FX: TColor32Entry absolute F;
  227. BX: TColor32Entry absolute B;
  228. Af, Ab: PByteArray;
  229. begin
  230. Af := @DivTable[M];
  231. M := Af[FX.A];
  232. if M = 0 then
  233. begin
  234. Exit;
  235. end;
  236. if M = $FF then
  237. begin
  238. B := F;
  239. Exit;
  240. end;
  241. with BX do
  242. begin
  243. Af := @DivTable[M];
  244. Ab := @DivTable[255 - M];
  245. R := Af[FX.R] + Ab[R];
  246. G := Af[FX.G] + Ab[G];
  247. B := Af[FX.B] + Ab[B];
  248. end;
  249. end;
  250. function BlendRegRGB_Pas(F, B, W: TColor32): TColor32;
  251. var
  252. FX: TColor32Entry absolute F;
  253. BX: TColor32Entry absolute B;
  254. WX: TColor32Entry absolute W;
  255. RX: TColor32Entry absolute Result;
  256. begin
  257. RX.R := (FX.R - BX.R) * WX.B div 255 + BX.R;
  258. RX.G := (FX.G - BX.G) * WX.G div 255 + BX.G;
  259. RX.B := (FX.B - BX.B) * WX.R div 255 + BX.B;
  260. end;
  261. procedure BlendMemRGB_Pas(F: TColor32; var B: TColor32; W: TColor32);
  262. var
  263. FX: TColor32Entry absolute F;
  264. BX: TColor32Entry absolute B;
  265. WX: TColor32Entry absolute W;
  266. begin
  267. BX.R := (FX.R - BX.R) * WX.B div 255 + BX.R;
  268. BX.G := (FX.G - BX.G) * WX.G div 255 + BX.G;
  269. BX.B := (FX.B - BX.B) * WX.R div 255 + BX.B;
  270. end;
  271. procedure BlendLine_Pas(Src, Dst: PColor32; Count: Integer);
  272. begin
  273. while Count > 0 do
  274. begin
  275. BlendMem(Src^, Dst^);
  276. Inc(Src);
  277. Inc(Dst);
  278. Dec(Count);
  279. end;
  280. end;
  281. procedure BlendLineEx_Pas(Src, Dst: PColor32; Count: Integer; M: TColor32);
  282. begin
  283. while Count > 0 do
  284. begin
  285. BlendMemEx(Src^, Dst^, M);
  286. Inc(Src);
  287. Inc(Dst);
  288. Dec(Count);
  289. end;
  290. end;
  291. function CombineReg_Pas(X, Y, W: TColor32): TColor32;
  292. var
  293. Xe: TColor32Entry absolute X;
  294. Ye: TColor32Entry absolute Y;
  295. Af, Ab: PByteArray;
  296. begin
  297. if W = 0 then
  298. begin
  299. Result := Y;
  300. Exit;
  301. end;
  302. if W >= $FF then
  303. begin
  304. Result := X;
  305. Exit;
  306. end;
  307. with Xe do
  308. begin
  309. Af := @DivTable[W];
  310. Ab := @DivTable[255 - W];
  311. R := Ab[Ye.R] + Af[R];
  312. G := Ab[Ye.G] + Af[G];
  313. B := Ab[Ye.B] + Af[B];
  314. end;
  315. Result := X;
  316. end;
  317. procedure CombineMem_Pas(X: TColor32; var Y: TColor32; W: TColor32);
  318. var
  319. Xe: TColor32Entry absolute X;
  320. Ye: TColor32Entry absolute Y;
  321. Af, Ab: PByteArray;
  322. begin
  323. if W = 0 then
  324. begin
  325. Exit;
  326. end;
  327. if W >= $FF then
  328. begin
  329. Y := X;
  330. Exit;
  331. end;
  332. with Xe do
  333. begin
  334. Af := @DivTable[W];
  335. Ab := @DivTable[255 - W];
  336. R := Ab[Ye.R] + Af[R];
  337. G := Ab[Ye.G] + Af[G];
  338. B := Ab[Ye.B] + Af[B];
  339. end;
  340. Y := X;
  341. end;
  342. procedure CombineLine_Pas(Src, Dst: PColor32; Count: Integer; W: TColor32);
  343. begin
  344. while Count > 0 do
  345. begin
  346. CombineMem(Src^, Dst^, W);
  347. Inc(Src);
  348. Inc(Dst);
  349. Dec(Count);
  350. end;
  351. end;
  352. function MergeReg_Pas(F, B: TColor32): TColor32;
  353. var
  354. Fa, Ba, Wa: TColor32;
  355. Fw, Bw: PByteArray;
  356. Fx: TColor32Entry absolute F;
  357. Bx: TColor32Entry absolute B;
  358. Rx: TColor32Entry absolute Result;
  359. begin
  360. Fa := F shr 24;
  361. Ba := B shr 24;
  362. if Fa = $FF then
  363. Result := F
  364. else if Fa = $0 then
  365. Result := B
  366. else if Ba = $0 then
  367. Result := F
  368. else
  369. begin
  370. Rx.A := DivTable[Fa xor 255, Ba xor 255] xor 255;
  371. Wa := RcTable[Rx.A, Fa];
  372. Fw := @DivTable[Wa];
  373. Bw := @DivTable[Wa xor $FF];
  374. Rx.R := Fw[Fx.R] + Bw[Bx.R];
  375. Rx.G := Fw[Fx.G] + Bw[Bx.G];
  376. Rx.B := Fw[Fx.B] + Bw[Bx.B];
  377. end;
  378. end;
  379. function MergeRegEx_Pas(F, B, M: TColor32): TColor32;
  380. begin
  381. Result := MergeReg(DivTable[M, F shr 24] shl 24 or F and $00FFFFFF, B);
  382. end;
  383. procedure MergeMem_Pas(F: TColor32; var B: TColor32);
  384. begin
  385. B := MergeReg(F, B);
  386. end;
  387. procedure MergeMemEx_Pas(F: TColor32; var B: TColor32; M: TColor32);
  388. begin
  389. B := MergeReg(DivTable[M, F shr 24] shl 24 or F and $00FFFFFF, B);
  390. end;
  391. procedure MergeLine_Pas(Src, Dst: PColor32; Count: Integer);
  392. begin
  393. while Count > 0 do
  394. begin
  395. Dst^ := MergeReg(Src^, Dst^);
  396. Inc(Src);
  397. Inc(Dst);
  398. Dec(Count);
  399. end;
  400. end;
  401. procedure MergeLineEx_Pas(Src, Dst: PColor32; Count: Integer; M: TColor32);
  402. var
  403. PM: PByteArray absolute M;
  404. begin
  405. PM := @DivTable[M];
  406. while Count > 0 do
  407. begin
  408. Dst^ := MergeReg((PM[Src^ shr 24] shl 24) or (Src^ and $00FFFFFF), Dst^);
  409. Inc(Src);
  410. Inc(Dst);
  411. Dec(Count);
  412. end;
  413. end;
  414. procedure EMMS_Pas;
  415. begin
  416. //Dummy
  417. end;
  418. function LightenReg_Pas(C: TColor32; Amount: Integer): TColor32;
  419. var
  420. r, g, b, a: Integer;
  421. CX: TColor32Entry absolute C;
  422. RX: TColor32Entry absolute Result;
  423. begin
  424. a := CX.A;
  425. r := CX.R;
  426. g := CX.G;
  427. b := CX.B;
  428. Inc(r, Amount);
  429. Inc(g, Amount);
  430. Inc(b, Amount);
  431. if r > 255 then r := 255 else if r < 0 then r := 0;
  432. if g > 255 then g := 255 else if g < 0 then g := 0;
  433. if b > 255 then b := 255 else if b < 0 then b := 0;
  434. RX.A := a;
  435. RX.R := r;
  436. RX.G := g;
  437. RX.B := b;
  438. end;
  439. { Color algebra }
  440. function ColorAdd_Pas(C1, C2: TColor32): TColor32;
  441. var
  442. r1, g1, b1, a1: Integer;
  443. r2, g2, b2, a2: Integer;
  444. begin
  445. a1 := C1 shr 24;
  446. r1 := C1 and $00FF0000;
  447. g1 := C1 and $0000FF00;
  448. b1 := C1 and $000000FF;
  449. a2 := C2 shr 24;
  450. r2 := C2 and $00FF0000;
  451. g2 := C2 and $0000FF00;
  452. b2 := C2 and $000000FF;
  453. a1 := a1 + a2;
  454. r1 := r1 + r2;
  455. g1 := g1 + g2;
  456. b1 := b1 + b2;
  457. if a1 > $FF then a1 := $FF;
  458. if r1 > $FF0000 then r1 := $FF0000;
  459. if g1 > $FF00 then g1 := $FF00;
  460. if b1 > $FF then b1 := $FF;
  461. Result := a1 shl 24 + r1 + g1 + b1;
  462. end;
  463. function ColorSub_Pas(C1, C2: TColor32): TColor32;
  464. var
  465. r1, g1, b1, a1: Integer;
  466. r2, g2, b2, a2: Integer;
  467. begin
  468. a1 := C1 shr 24;
  469. r1 := C1 and $00FF0000;
  470. g1 := C1 and $0000FF00;
  471. b1 := C1 and $000000FF;
  472. r1 := r1 shr 16;
  473. g1 := g1 shr 8;
  474. a2 := C2 shr 24;
  475. r2 := C2 and $00FF0000;
  476. g2 := C2 and $0000FF00;
  477. b2 := C2 and $000000FF;
  478. r2 := r2 shr 16;
  479. g2 := g2 shr 8;
  480. a1 := a1 - a2;
  481. r1 := r1 - r2;
  482. g1 := g1 - g2;
  483. b1 := b1 - b2;
  484. if a1 < 0 then a1 := 0;
  485. if r1 < 0 then r1 := 0;
  486. if g1 < 0 then g1 := 0;
  487. if b1 < 0 then b1 := 0;
  488. Result := a1 shl 24 + r1 shl 16 + g1 shl 8 + b1;
  489. end;
  490. function ColorDiv_Pas(C1, C2: TColor32): TColor32;
  491. var
  492. r1, g1, b1, a1: Integer;
  493. r2, g2, b2, a2: Integer;
  494. begin
  495. a1 := C1 shr 24;
  496. r1 := (C1 and $00FF0000) shr 16;
  497. g1 := (C1 and $0000FF00) shr 8;
  498. b1 := C1 and $000000FF;
  499. a2 := C2 shr 24;
  500. r2 := (C2 and $00FF0000) shr 16;
  501. g2 := (C2 and $0000FF00) shr 8;
  502. b2 := C2 and $000000FF;
  503. if a1 = 0 then a1:=$FF
  504. else a1 := (a2 shl 8) div a1;
  505. if r1 = 0 then r1:=$FF
  506. else r1 := (r2 shl 8) div r1;
  507. if g1 = 0 then g1:=$FF
  508. else g1 := (g2 shl 8) div g1;
  509. if b1 = 0 then b1:=$FF
  510. else b1 := (b2 shl 8) div b1;
  511. if a1 > $FF then a1 := $FF;
  512. if r1 > $FF then r1 := $FF;
  513. if g1 > $FF then g1 := $FF;
  514. if b1 > $FF then b1 := $FF;
  515. Result := a1 shl 24 + r1 shl 16 + g1 shl 8 + b1;
  516. end;
  517. function ColorModulate_Pas(C1, C2: TColor32): TColor32;
  518. var
  519. REnt: TColor32Entry absolute Result;
  520. C2Ent: TColor32Entry absolute C2;
  521. begin
  522. Result := C1;
  523. REnt.A := (C2Ent.A * REnt.A) shr 8;
  524. REnt.R := (C2Ent.R * REnt.R) shr 8;
  525. REnt.G := (C2Ent.G * REnt.G) shr 8;
  526. REnt.B := (C2Ent.B * REnt.B) shr 8;
  527. end;
  528. function ColorMax_Pas(C1, C2: TColor32): TColor32;
  529. var
  530. REnt: TColor32Entry absolute Result;
  531. C2Ent: TColor32Entry absolute C2;
  532. begin
  533. Result := C1;
  534. with C2Ent do
  535. begin
  536. if A > REnt.A then REnt.A := A;
  537. if R > REnt.R then REnt.R := R;
  538. if G > REnt.G then REnt.G := G;
  539. if B > REnt.B then REnt.B := B;
  540. end;
  541. end;
  542. function ColorMin_Pas(C1, C2: TColor32): TColor32;
  543. var
  544. REnt: TColor32Entry absolute Result;
  545. C2Ent: TColor32Entry absolute C2;
  546. begin
  547. Result := C1;
  548. with C2Ent do
  549. begin
  550. if A < REnt.A then REnt.A := A;
  551. if R < REnt.R then REnt.R := R;
  552. if G < REnt.G then REnt.G := G;
  553. if B < REnt.B then REnt.B := B;
  554. end;
  555. end;
  556. function ColorDifference_Pas(C1, C2: TColor32): TColor32;
  557. var
  558. r1, g1, b1, a1: TColor32;
  559. r2, g2, b2, a2: TColor32;
  560. begin
  561. a1 := C1 shr 24;
  562. r1 := C1 and $00FF0000;
  563. g1 := C1 and $0000FF00;
  564. b1 := C1 and $000000FF;
  565. r1 := r1 shr 16;
  566. g1 := g1 shr 8;
  567. a2 := C2 shr 24;
  568. r2 := C2 and $00FF0000;
  569. g2 := C2 and $0000FF00;
  570. b2 := C2 and $000000FF;
  571. r2 := r2 shr 16;
  572. g2 := g2 shr 8;
  573. a1 := abs(a2 - a1);
  574. r1 := abs(r2 - r1);
  575. g1 := abs(g2 - g1);
  576. b1 := abs(b2 - b1);
  577. Result := a1 shl 24 + r1 shl 16 + g1 shl 8 + b1;
  578. end;
  579. function ColorExclusion_Pas(C1, C2: TColor32): TColor32;
  580. var
  581. r1, g1, b1, a1: TColor32;
  582. r2, g2, b2, a2: TColor32;
  583. begin
  584. a1 := C1 shr 24;
  585. r1 := C1 and $00FF0000;
  586. g1 := C1 and $0000FF00;
  587. b1 := C1 and $000000FF;
  588. r1 := r1 shr 16;
  589. g1 := g1 shr 8;
  590. a2 := C2 shr 24;
  591. r2 := C2 and $00FF0000;
  592. g2 := C2 and $0000FF00;
  593. b2 := C2 and $000000FF;
  594. r2 := r2 shr 16;
  595. g2 := g2 shr 8;
  596. a1 := a1 + a2 - (a1 * a2 shr 7);
  597. r1 := r1 + r2 - (r1 * r2 shr 7);
  598. g1 := g1 + g2 - (g1 * g2 shr 7);
  599. b1 := b1 + b2 - (b1 * b2 shr 7);
  600. Result := a1 shl 24 + r1 shl 16 + g1 shl 8 + b1;
  601. end;
  602. function ColorAverage_Pas(C1, C2: TColor32): TColor32;
  603. //(A + B)/2 = (A and B) + (A xor B)/2
  604. var
  605. C3 : TColor32;
  606. begin
  607. C3 := C1;
  608. C1 := C1 xor C2;
  609. C1 := C1 shr 1;
  610. C1 := C1 and $7F7F7F7F;
  611. C3 := C3 and C2;
  612. Result := C3 + C1;
  613. end;
  614. function ColorScale_Pas(C, W: TColor32): TColor32;
  615. var
  616. r1, g1, b1, a1: Cardinal;
  617. begin
  618. a1 := C shr 24;
  619. r1 := C and $00FF0000;
  620. g1 := C and $0000FF00;
  621. b1 := C and $000000FF;
  622. r1 := r1 shr 16;
  623. g1 := g1 shr 8;
  624. a1 := a1 * W shr 8;
  625. r1 := r1 * W shr 8;
  626. g1 := g1 * W shr 8;
  627. b1 := b1 * W shr 8;
  628. if a1 > 255 then a1 := 255;
  629. if r1 > 255 then r1 := 255;
  630. if g1 > 255 then g1 := 255;
  631. if b1 > 255 then b1 := 255;
  632. Result := a1 shl 24 + r1 shl 16 + g1 shl 8 + b1;
  633. end;
  634. {$IFNDEF PUREPASCAL}
  635. { Assembler versions }
  636. const
  637. bias = $00800080;
  638. function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  639. asm
  640. // blend foreground color (F) to a background color (B),
  641. // using alpha channel value of F
  642. // Result Z = Fa * Frgb + (1 - Fa) * Brgb
  643. {$IFDEF TARGET_x86}
  644. // EAX <- F
  645. // EDX <- B
  646. // Test Fa = 255 ?
  647. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  648. JNC @2
  649. // Test Fa = 0 ?
  650. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  651. JZ @1
  652. // Get weight W = Fa * M
  653. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  654. SHR ECX,24 // ECX <- 00 00 00 Fa
  655. PUSH EBX
  656. // P = W * F
  657. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  658. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  659. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  660. IMUL EAX,ECX // EAX <- Pr ** Pb **
  661. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  662. IMUL EBX,ECX // EBX <- Pa ** Pg **
  663. ADD EAX,bias
  664. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  665. SHR EAX,8 // EAX <- 00 Pr ** Pb
  666. ADD EBX,bias
  667. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  668. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  669. // W = 1 - W; Q = W * B
  670. XOR ECX,$000000FF // ECX <- 1 - ECX
  671. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  672. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  673. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  674. IMUL EDX,ECX // EDX <- Qr ** Qb **
  675. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  676. IMUL EBX,ECX // EBX <- Qa ** Qg **
  677. ADD EDX,bias
  678. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  679. SHR EDX,8 // EDX <- 00 Qr ** Qb
  680. ADD EBX,bias
  681. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  682. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  683. // Z = P + Q (assuming no overflow at each byte)
  684. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  685. POP EBX
  686. RET
  687. @1: MOV EAX,EDX
  688. @2:
  689. {$ENDIF}
  690. // EAX <- F
  691. // EDX <- B
  692. {$IFDEF TARGET_x64}
  693. MOV RAX, RCX
  694. // Test Fa = 255 ?
  695. CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
  696. JNC @2
  697. // Test Fa = 0 ?
  698. TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
  699. JZ @1
  700. // Get weight W = Fa * M
  701. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  702. SHR ECX,24 // ECX <- 00 00 00 Fa
  703. // P = W * F
  704. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  705. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  706. AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
  707. IMUL EAX,ECX // EAX <- Pr ** Pb **
  708. SHR R9D,8 // R9D <- 00 Fa 00 Fg
  709. IMUL R9D,ECX // R9D <- Pa ** Pg **
  710. ADD EAX,bias
  711. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  712. SHR EAX,8 // EAX <- 00 Pr ** Pb
  713. ADD R9D,bias
  714. AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
  715. OR EAX,R9D // EAX <- Pa Pr Pg Pb
  716. // W = 1 - W; Q = W * B
  717. XOR ECX,$000000FF // ECX <- 1 - ECX
  718. MOV R9D,EDX // R9D <- Ba Br Bg Bb
  719. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  720. AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
  721. IMUL EDX,ECX // EDX <- Qr ** Qb **
  722. SHR R9D,8 // R9D <- 00 Ba 00 Bg
  723. IMUL R9D,ECX // R9D <- Qa ** Qg **
  724. ADD EDX,bias
  725. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  726. SHR EDX,8 // EDX <- 00 Qr ** Qb
  727. ADD R9D,bias
  728. AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
  729. OR R9D,EDX // R9D <- Qa Qr Qg Qb
  730. // Z = P + Q (assuming no overflow at each byte)
  731. ADD EAX,R9D // EAX <- Za Zr Zg Zb
  732. RET
  733. @1: MOV EAX,EDX
  734. @2:
  735. {$ENDIF}
  736. end;
  737. procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  738. asm
  739. {$IFDEF TARGET_x86}
  740. // EAX <- F
  741. // [EDX] <- B
  742. // Test Fa = 0 ?
  743. TEST EAX,$FF000000 // Fa = 0 ? => do not write
  744. JZ @2
  745. // Get weight W = Fa * M
  746. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  747. SHR ECX,24 // ECX <- 00 00 00 Fa
  748. // Test Fa = 255 ?
  749. CMP ECX,$FF
  750. JZ @1
  751. PUSH EBX
  752. PUSH ESI
  753. // P = W * F
  754. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  755. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  756. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  757. IMUL EAX,ECX // EAX <- Pr ** Pb **
  758. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  759. IMUL EBX,ECX // EBX <- Pa ** Pg **
  760. ADD EAX,bias
  761. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  762. SHR EAX,8 // EAX <- 00 Pr ** Pb
  763. ADD EBX,bias
  764. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  765. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  766. MOV ESI,[EDX]
  767. // W = 1 - W; Q = W * B
  768. XOR ECX,$000000FF // ECX <- 1 - ECX
  769. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  770. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  771. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  772. IMUL ESI,ECX // ESI <- Qr ** Qb **
  773. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  774. IMUL EBX,ECX // EBX <- Qa ** Qg **
  775. ADD ESI,bias
  776. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  777. SHR ESI,8 // ESI <- 00 Qr ** Qb
  778. ADD EBX,bias
  779. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  780. OR EBX,ESI // EBX <- Qa Qr Qg Qb
  781. // Z = P + Q (assuming no overflow at each byte)
  782. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  783. MOV [EDX],EAX
  784. POP ESI
  785. POP EBX
  786. RET
  787. @1: MOV [EDX],EAX
  788. @2:
  789. {$ENDIF}
  790. {$IFDEF TARGET_x64}
  791. // ECX <- F
  792. // [RDX] <- B
  793. // Test Fa = 0 ?
  794. TEST ECX,$FF000000 // Fa = 0 ? => do not write
  795. JZ @2
  796. MOV EAX, ECX // EAX <- Fa Fr Fg Fb
  797. // Get weight W = Fa * M
  798. SHR ECX,24 // ECX <- 00 00 00 Fa
  799. // Test Fa = 255 ?
  800. CMP ECX,$FF
  801. JZ @1
  802. // P = W * F
  803. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  804. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  805. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  806. IMUL EAX,ECX // EAX <- Pr ** Pb **
  807. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  808. IMUL R8D,ECX // R8D <- Pa ** Pg **
  809. ADD EAX,bias
  810. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  811. SHR EAX,8 // EAX <- 00 Pr ** Pb
  812. ADD R8D,bias
  813. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  814. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  815. MOV R9D,[RDX]
  816. // W = 1 - W; Q = W * B
  817. XOR ECX,$000000FF // ECX <- 1 - ECX
  818. MOV R8D,R9D // R8D <- Ba Br Bg Bb
  819. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  820. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  821. IMUL R9D,ECX // R9D <- Qr ** Qb **
  822. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  823. IMUL R8D,ECX // R8D <- Qa ** Qg **
  824. ADD R9D,bias
  825. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  826. SHR R9D,8 // R9D <- 00 Qr ** Qb
  827. ADD R8D,bias
  828. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  829. OR R8D,R9D // R8D <- Qa Qr Qg Qb
  830. // Z = P + Q (assuming no overflow at each byte)
  831. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  832. MOV [RDX],EAX
  833. RET
  834. @1: MOV [RDX],EAX
  835. @2:
  836. {$ENDIF}
  837. end;
  838. procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  839. asm
  840. {$IFDEF TARGET_x86}
  841. TEST ECX,ECX
  842. JZ @Done
  843. PUSH EBX
  844. PUSH ESI
  845. PUSH EDI
  846. MOV ESI,EAX
  847. MOV EDI,EDX
  848. @LoopStart:
  849. MOV EAX,[ESI]
  850. TEST EAX,$FF000000
  851. JZ @NextPixel
  852. PUSH ECX
  853. MOV ECX,EAX
  854. SHR ECX,24
  855. CMP ECX,$FF
  856. JZ @CopyPixel
  857. MOV EBX,EAX
  858. AND EAX,$00FF00FF
  859. AND EBX,$FF00FF00
  860. IMUL EAX,ECX
  861. SHR EBX,8
  862. IMUL EBX,ECX
  863. ADD EAX,bias
  864. AND EAX,$FF00FF00
  865. SHR EAX,8
  866. ADD EBX,bias
  867. AND EBX,$FF00FF00
  868. OR EAX,EBX
  869. MOV EDX,[EDI]
  870. XOR ECX,$000000FF
  871. MOV EBX,EDX
  872. AND EDX,$00FF00FF
  873. AND EBX,$FF00FF00
  874. IMUL EDX,ECX
  875. SHR EBX,8
  876. IMUL EBX,ECX
  877. ADD EDX,bias
  878. AND EDX,$FF00FF00
  879. SHR EDX,8
  880. ADD EBX,bias
  881. AND EBX,$FF00FF00
  882. OR EBX,EDX
  883. ADD EAX,EBX
  884. @CopyPixel:
  885. OR EAX,$FF000000
  886. MOV [EDI],EAX
  887. POP ECX
  888. @NextPixel:
  889. ADD ESI,4
  890. ADD EDI,4
  891. DEC ECX
  892. JNZ @LoopStart
  893. POP EDI
  894. POP ESI
  895. POP EBX
  896. @Done:
  897. RET
  898. {$ENDIF}
  899. {$IFDEF TARGET_x64}
  900. TEST R8D,R8D
  901. JZ @Done
  902. PUSH RDI
  903. MOV R9,RCX
  904. MOV RDI,RDX
  905. @LoopStart:
  906. MOV ECX,[RSI]
  907. TEST ECX,$FF000000
  908. JZ @NextPixel
  909. PUSH R8
  910. MOV R8D,ECX
  911. SHR R8D,24
  912. CMP R8D,$FF
  913. JZ @CopyPixel
  914. MOV EAX,ECX
  915. AND ECX,$00FF00FF
  916. AND EAX,$FF00FF00
  917. IMUL ECX,R8D
  918. SHR EAX,8
  919. IMUL EAX,R8D
  920. ADD ECX,bias
  921. AND ECX,$FF00FF00
  922. SHR ECX,8
  923. ADD EAX,bias
  924. AND EAX,$FF00FF00
  925. OR ECX,EAX
  926. MOV EDX,[RDI]
  927. XOR R8D,$000000FF
  928. MOV EAX,EDX
  929. AND EDX,$00FF00FF
  930. AND EAX,$FF00FF00
  931. IMUL EDX, R8D
  932. SHR EAX,8
  933. IMUL EAX,R8D
  934. ADD EDX,bias
  935. AND EDX,$FF00FF00
  936. SHR EDX,8
  937. ADD EAX,bias
  938. AND EAX,$FF00FF00
  939. OR EAX,EDX
  940. ADD ECX,EAX
  941. @CopyPixel:
  942. OR ECX,$FF000000
  943. MOV [RDI],ECX
  944. POP R8
  945. @NextPixel:
  946. ADD R9,4
  947. ADD RDI,4
  948. DEC R8D
  949. JNZ @LoopStart
  950. POP RDI
  951. @Done:
  952. RET
  953. {$ENDIF}
  954. end;
  955. function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  956. asm
  957. // blend foreground color (F) to a background color (B),
  958. // using alpha channel value of F multiplied by master alpha (M)
  959. // no checking for M = $FF, in this case Graphics32 uses BlendReg
  960. // Result Z = Fa * M * Frgb + (1 - Fa * M) * Brgb
  961. // EAX <- F
  962. // EDX <- B
  963. // ECX <- M
  964. {$IFDEF TARGET_x86}
  965. // Check Fa > 0 ?
  966. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  967. JZ @2
  968. PUSH EBX
  969. // Get weight W = Fa * M
  970. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  971. INC ECX // 255:256 range bias
  972. SHR EBX,24 // EBX <- 00 00 00 Fa
  973. IMUL ECX,EBX // ECX <- 00 00 W **
  974. SHR ECX,8 // ECX <- 00 00 00 W
  975. JZ @1 // W = 0 ? => Result := EDX
  976. // P = W * F
  977. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  978. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  979. AND EBX,$0000FF00 // EBX <- 00 00 Fg 00
  980. IMUL EAX,ECX // EAX <- Pr ** Pb **
  981. SHR EBX,8 // EBX <- 00 00 00 Fg
  982. IMUL EBX,ECX // EBX <- 00 00 Pg **
  983. ADD EAX,bias
  984. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  985. SHR EAX,8 // EAX <- 00 Pr ** Pb
  986. ADD EBX,bias
  987. AND EBX,$0000FF00 // EBX <- 00 00 Pg 00
  988. OR EAX,EBX // EAX <- 00 Pr Pg Pb
  989. // W = 1 - W; Q = W * B
  990. XOR ECX,$000000FF // ECX <- 1 - ECX
  991. MOV EBX,EDX // EBX <- 00 Br Bg Bb
  992. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  993. AND EBX,$0000FF00 // EBX <- 00 00 Bg 00
  994. IMUL EDX,ECX // EDX <- Qr ** Qb **
  995. SHR EBX,8 // EBX <- 00 00 00 Bg
  996. IMUL EBX,ECX // EBX <- 00 00 Qg **
  997. ADD EDX,bias
  998. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  999. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1000. ADD EBX,bias
  1001. AND EBX,$0000FF00 // EBX <- 00 00 Qg 00
  1002. OR EBX,EDX // EBX <- 00 Qr Qg Qb
  1003. // Z = P + Q (assuming no overflow at each byte)
  1004. ADD EAX,EBX // EAX <- 00 Zr Zg Zb
  1005. POP EBX
  1006. RET
  1007. @1:
  1008. POP EBX
  1009. @2: MOV EAX,EDX
  1010. {$ENDIF}
  1011. {$IFDEF TARGET_x64}
  1012. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  1013. TEST EAX,$FF000000 // Fa = 0? => Result := EDX
  1014. JZ @1
  1015. // Get weight W = Fa * M
  1016. INC R8D // 255:256 range bias
  1017. SHR ECX,24 // ECX <- 00 00 00 Fa
  1018. IMUL R8D,ECX // R8D <- 00 00 W **
  1019. SHR R8D,8 // R8D <- 00 00 00 W
  1020. JZ @1 // W = 0 ? => Result := EDX
  1021. // P = W * F
  1022. MOV ECX,EAX // ECX <- ** Fr Fg Fb
  1023. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1024. AND ECX,$0000FF00 // ECX <- 00 00 Fg 00
  1025. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1026. SHR ECX,8 // ECX <- 00 00 00 Fg
  1027. IMUL ECX,R8D // ECX <- 00 00 Pg **
  1028. ADD EAX,bias
  1029. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1030. SHR EAX,8 // EAX <- 00 Pr ** Pb
  1031. ADD ECX,bias
  1032. AND ECX,$0000FF00 // ECX <- 00 00 Pg 00
  1033. OR EAX,ECX // EAX <- 00 Pr Pg Pb
  1034. // W = 1 - W; Q = W * B
  1035. XOR R8D,$000000FF // R8D <- 1 - R8D
  1036. MOV ECX,EDX // ECX <- 00 Br Bg Bb
  1037. AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
  1038. AND ECX,$0000FF00 // ECX <- 00 00 Bg 00
  1039. IMUL EDX,R8D // EDX <- Qr ** Qb **
  1040. SHR ECX,8 // ECX <- 00 00 00 Bg
  1041. IMUL ECX,R8D // ECX <- 00 00 Qg **
  1042. ADD EDX,bias
  1043. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1044. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1045. ADD ECX,bias
  1046. AND ECX,$0000FF00 // ECX <- 00 00 Qg 00
  1047. OR ECX,EDX // ECX <- 00 Qr Qg Qb
  1048. // Z = P + Q (assuming no overflow at each byte)
  1049. ADD EAX,ECX // EAX <- 00 Zr Zg Zb
  1050. RET
  1051. @1: MOV EAX,EDX
  1052. {$ENDIF}
  1053. end;
  1054. procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1055. asm
  1056. {$IFDEF TARGET_x86}
  1057. // EAX <- F
  1058. // [EDX] <- B
  1059. // ECX <- M
  1060. // Check Fa > 0 ?
  1061. TEST EAX,$FF000000 // Fa = 0? => write nothing
  1062. JZ @2
  1063. PUSH EBX
  1064. // Get weight W = Fa * M
  1065. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  1066. INC ECX // 255:256 range bias
  1067. SHR EBX,24 // EBX <- 00 00 00 Fa
  1068. IMUL ECX,EBX // ECX <- 00 00 W **
  1069. SHR ECX,8 // ECX <- 00 00 00 W
  1070. JZ @1 // W = 0 ? => write nothing
  1071. PUSH ESI
  1072. // P = W * F
  1073. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  1074. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1075. AND EBX,$0000FF00 // EBX <- 00 00 Fg 00
  1076. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1077. SHR EBX,8 // EBX <- 00 00 00 Fg
  1078. IMUL EBX,ECX // EBX <- 00 00 Pg **
  1079. ADD EAX,bias
  1080. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1081. SHR EAX,8 // EAX <- 00 Pr ** Pb
  1082. ADD EBX,bias
  1083. AND EBX,$0000FF00 // EBX <- 00 00 Pg 00
  1084. OR EAX,EBX // EAX <- 00 Pr Pg Pb
  1085. // W = 1 - W; Q = W * B
  1086. MOV ESI,[EDX]
  1087. XOR ECX,$000000FF // ECX <- 1 - ECX
  1088. MOV EBX,ESI // EBX <- 00 Br Bg Bb
  1089. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  1090. AND EBX,$0000FF00 // EBX <- 00 00 Bg 00
  1091. IMUL ESI,ECX // ESI <- Qr ** Qb **
  1092. SHR EBX,8 // EBX <- 00 00 00 Bg
  1093. IMUL EBX,ECX // EBX <- 00 00 Qg **
  1094. ADD ESI,bias
  1095. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  1096. SHR ESI,8 // ESI <- 00 Qr ** Qb
  1097. ADD EBX,bias
  1098. AND EBX,$0000FF00 // EBX <- 00 00 Qg 00
  1099. OR EBX,ESI // EBX <- 00 Qr Qg Qb
  1100. // Z = P + Q (assuming no overflow at each byte)
  1101. ADD EAX,EBX // EAX <- 00 Zr Zg Zb
  1102. MOV [EDX],EAX
  1103. POP ESI
  1104. @1: POP EBX
  1105. @2:
  1106. {$ENDIF}
  1107. {$IFDEF TARGET_x64}
  1108. // ECX <- F
  1109. // [RDX] <- B
  1110. // R8 <- M
  1111. // ECX <- F
  1112. // [EDX] <- B
  1113. // R8 <- M
  1114. // Check Fa > 0 ?
  1115. TEST ECX,$FF000000 // Fa = 0? => write nothing
  1116. JZ @1
  1117. // Get weight W = Fa * M
  1118. MOV EAX,ECX // EAX <- Fa Fr Fg Fb
  1119. INC R8D // 255:256 range bias
  1120. SHR EAX,24 // EAX <- 00 00 00 Fa
  1121. IMUL R8D,EAX // R8D <- 00 00 W **
  1122. SHR R8D,8 // R8D <- 00 00 00 W
  1123. JZ @1 // W = 0 ? => write nothing
  1124. // P = W * F
  1125. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  1126. AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
  1127. AND EAX,$0000FF00 // EAX <- 00 00 Fg 00
  1128. IMUL ECX,R8D // ECX <- Pr ** Pb **
  1129. SHR EAX,8 // EAX <- 00 00 00 Fg
  1130. IMUL EAX,R8D // EAX <- 00 00 Pg **
  1131. ADD ECX,bias
  1132. AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
  1133. SHR ECX,8 // ECX <- 00 Pr ** Pb
  1134. ADD EAX,bias
  1135. AND EAX,$0000FF00 // EAX <- 00 00 Pg 00
  1136. OR ECX,EAX // ECX <- 00 Pr Pg Pb
  1137. // W = 1 - W; Q = W * B
  1138. MOV R9D,[RDX]
  1139. XOR R8D,$000000FF // R8D <- 1 - R8
  1140. MOV EAX,R9D // EAX <- 00 Br Bg Bb
  1141. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  1142. AND EAX,$0000FF00 // EAX <- 00 00 Bg 00
  1143. IMUL R9D,R8D // R9D <- Qr ** Qb **
  1144. SHR EAX,8 // EAX <- 00 00 00 Bg
  1145. IMUL EAX,R8D // EAX <- 00 00 Qg **
  1146. ADD R9D,bias
  1147. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  1148. SHR R9D,8 // R9D <- 00 Qr ** Qb
  1149. ADD EAX,bias
  1150. AND EAX,$0000FF00 // EAX <- 00 00 Qg 00
  1151. OR EAX,R9D // EAX <- 00 Qr Qg Qb
  1152. // Z = P + Q (assuming no overflow at each byte)
  1153. ADD ECX,EAX // ECX <- 00 Zr Zg Zb
  1154. MOV [RDX],ECX
  1155. @1:
  1156. {$ENDIF}
  1157. end;
  1158. procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1159. asm
  1160. {$IFDEF TARGET_x86}
  1161. // EAX <- Src
  1162. // EDX <- Dst
  1163. // ECX <- Count
  1164. // test the counter for zero or negativity
  1165. TEST ECX,ECX
  1166. JS @4
  1167. PUSH EBX
  1168. PUSH ESI
  1169. PUSH EDI
  1170. MOV ESI,EAX // ESI <- Src
  1171. MOV EDI,EDX // EDI <- Dst
  1172. // loop start
  1173. @1: MOV EAX,[ESI]
  1174. TEST EAX,$FF000000
  1175. JZ @3 // complete transparency, proceed to next point
  1176. PUSH ECX // store counter
  1177. // Get weight W = Fa * M
  1178. MOV ECX,EAX // ECX <- Fa Fr Fg Fb
  1179. SHR ECX,24 // ECX <- 00 00 00 Fa
  1180. // Test Fa = 255 ?
  1181. CMP ECX,$FF
  1182. JZ @2
  1183. // P = W * F
  1184. MOV EBX,EAX // EBX <- Fa Fr Fg Fb
  1185. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1186. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  1187. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1188. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  1189. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1190. ADD EAX,bias
  1191. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1192. SHR EAX,8 // EAX <- 00 Pr ** Pb
  1193. ADD EBX,bias
  1194. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1195. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1196. // W = 1 - W; Q = W * B
  1197. MOV EDX,[EDI]
  1198. XOR ECX,$000000FF // ECX <- 1 - ECX
  1199. MOV EBX,EDX // EBX <- Ba Br Bg Bb
  1200. AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
  1201. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  1202. IMUL EDX,ECX // ESI <- Qr ** Qb **
  1203. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  1204. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1205. ADD EDX,bias
  1206. AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
  1207. SHR EDX,8 // ESI <- 00 Qr ** Qb
  1208. ADD EBX,bias
  1209. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1210. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  1211. // Z = P + Q (assuming no overflow at each byte)
  1212. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1213. @2:
  1214. MOV [EDI],EAX
  1215. POP ECX // restore counter
  1216. @3:
  1217. ADD ESI,4
  1218. ADD EDI,4
  1219. // loop end
  1220. DEC ECX
  1221. JNZ @1
  1222. POP EDI
  1223. POP ESI
  1224. POP EBX
  1225. @4:
  1226. {$ENDIF}
  1227. {$IFDEF TARGET_x64}
  1228. // RCX <- Src
  1229. // RDX <- Dst
  1230. // R8 <- Count
  1231. // test the counter for zero or negativity
  1232. TEST R8D,R8D
  1233. JS @4
  1234. MOV R10,RCX // R10 <- Src
  1235. MOV R11,RDX // R11 <- Dst
  1236. MOV ECX,R8D // RCX <- Count
  1237. // loop start
  1238. @1:
  1239. MOV EAX,[R10]
  1240. TEST EAX,$FF000000
  1241. JZ @3 // complete transparency, proceed to next point
  1242. // Get weight W = Fa * M
  1243. MOV R9D,EAX // R9D <- Fa Fr Fg Fb
  1244. SHR R9D,24 // R9D <- 00 00 00 Fa
  1245. // Test Fa = 255 ?
  1246. CMP R9D,$FF
  1247. JZ @2
  1248. // P = W * F
  1249. MOV R8D,EAX // R8D <- Fa Fr Fg Fb
  1250. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1251. AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
  1252. IMUL EAX,R9D // EAX <- Pr ** Pb **
  1253. SHR R8D,8 // R8D <- 00 Fa 00 Fg
  1254. IMUL R8D,R9D // R8D <- Pa ** Pg **
  1255. ADD EAX,bias
  1256. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1257. SHR EAX,8 // EAX <- 00 Pr ** Pb
  1258. ADD R8D,bias
  1259. AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
  1260. OR EAX,R8D // EAX <- Pa Pr Pg Pb
  1261. // W = 1 - W; Q = W * B
  1262. MOV EDX,[R11]
  1263. XOR R9D,$000000FF // R9D <- 1 - R9D
  1264. MOV R8D,EDX // R8D <- Ba Br Bg Bb
  1265. AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
  1266. AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
  1267. IMUL EDX,R9D // ESI <- Qr ** Qb **
  1268. SHR R8D,8 // R8D <- 00 Ba 00 Bg
  1269. IMUL R8D,R9D // R8D <- Qa ** Qg **
  1270. ADD EDX,bias
  1271. AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
  1272. SHR EDX,8 // ESI <- 00 Qr ** Qb
  1273. ADD R8D,bias
  1274. AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
  1275. OR R8D,EDX // R8D <- Qa Qr Qg Qb
  1276. // Z = P + Q (assuming no overflow at each byte)
  1277. ADD EAX,R8D // EAX <- Za Zr Zg Zb
  1278. @2:
  1279. MOV [R11],EAX
  1280. @3:
  1281. ADD R10,4
  1282. ADD R11,4
  1283. // loop end
  1284. DEC ECX
  1285. JNZ @1
  1286. @4:
  1287. {$ENDIF}
  1288. end;
  1289. {$IFDEF TARGET_x86}
  1290. function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1291. asm
  1292. // EAX <- F
  1293. // EDX <- B
  1294. // if F.A = 0 then
  1295. TEST EAX,$FF000000
  1296. JZ @exit0
  1297. // else if B.A = 255 then
  1298. CMP EDX,$FF000000
  1299. JNC @blend
  1300. // else if F.A = 255 then
  1301. CMP EAX,$FF000000
  1302. JNC @Exit
  1303. // else if B.A = 0 then
  1304. TEST EDX,$FF000000
  1305. JZ @Exit
  1306. @4:
  1307. PUSH EBX
  1308. PUSH ESI
  1309. PUSH EDI
  1310. ADD ESP,-$0C
  1311. MOV [ESP+$04],EDX
  1312. MOV [ESP],EAX
  1313. // AH <- F.A
  1314. // DL, CL <- B.A
  1315. SHR EAX,16
  1316. AND EAX,$0000FF00
  1317. SHR EDX,24
  1318. MOV CL,DL
  1319. NOP
  1320. NOP
  1321. NOP
  1322. // EDI <- PF
  1323. // EDX <- PB
  1324. // ESI <- PR
  1325. // PF := @DivTable[F.A];
  1326. LEA EDI,[EAX+DivTable]
  1327. // PB := @DivTable[B.A];
  1328. SHL EDX,$08
  1329. LEA EDX,[EDX+DivTable]
  1330. // Result.A := B.A + F.A - PB[F.A];
  1331. SHR EAX,8
  1332. //ADD CL,AL
  1333. ADD ECX,EAX
  1334. //SUB CL,[EDX+EAX]
  1335. SUB ECX,[EDX+EAX]
  1336. MOV [ESP+$0B],CL
  1337. // PR := @RcTable[Result.A];
  1338. SHL ECX,$08
  1339. AND ECX,$0000FFFF
  1340. LEA ESI,[ECX+RcTable]
  1341. { Red component }
  1342. // Result.R := PB[B.R];
  1343. XOR EAX,EAX
  1344. MOV AL,[ESP+$06]
  1345. MOV CL,[EDX+EAX]
  1346. MOV [ESP+$0a],CL
  1347. // X := F.R - Result.R;
  1348. MOV AL,[ESP+$02]
  1349. XOR EBX,EBX
  1350. MOV BL,CL
  1351. SUB EAX,EBX
  1352. // if X >= 0 then
  1353. JL @5
  1354. // Result.R := PR[PF[X] + Result.R]
  1355. MOVZX EAX,BYTE PTR[EDI+EAX]
  1356. AND ECX,$000000FF
  1357. ADD EAX,ECX
  1358. MOV AL,[ESI+EAX]
  1359. MOV [ESP+$0A],AL
  1360. JMP @6
  1361. @5:
  1362. // Result.R := PR[Result.R - PF[-X]];
  1363. NEG EAX
  1364. MOVZX EAX,BYTE PTR[EDI+EAX]
  1365. XOR ECX,ECX
  1366. MOV CL,[ESP+$0A]
  1367. SUB ECX,EAX
  1368. MOV AL,[ESI+ECX]
  1369. MOV [ESP+$0A],AL
  1370. { Green component }
  1371. @6:
  1372. // Result.G := PB[B.G];
  1373. XOR EAX,EAX
  1374. MOV AL,[ESP+$05]
  1375. MOV CL,[EDX+EAX]
  1376. MOV [ESP+$09],CL
  1377. // X := F.G - Result.G;
  1378. MOV AL,[ESP+$01]
  1379. XOR EBX,EBX
  1380. MOV BL,CL
  1381. SUB EAX,EBX
  1382. // if X >= 0 then
  1383. JL @7
  1384. // Result.G := PR[PF[X] + Result.G]
  1385. MOVZX EAX,BYTE PTR[EDI+EAX]
  1386. AND ECX,$000000FF
  1387. ADD EAX,ECX
  1388. MOV AL,[ESI+EAX]
  1389. MOV [ESP+$09],AL
  1390. JMP @8
  1391. @7:
  1392. // Result.G := PR[Result.G - PF[-X]];
  1393. NEG EAX
  1394. MOVZX EAX,BYTE PTR[EDI+EAX]
  1395. XOR ECX,ECX
  1396. MOV CL,[ESP+$09]
  1397. SUB ECX,EAX
  1398. MOV AL,[ESI+ECX]
  1399. MOV [ESP+$09],AL
  1400. { Blue component }
  1401. @8:
  1402. // Result.B := PB[B.B];
  1403. XOR EAX,EAX
  1404. MOV AL,[ESP+$04]
  1405. MOV CL,[EDX+EAX]
  1406. MOV [ESP+$08],CL
  1407. // X := F.B - Result.B;
  1408. MOV AL,[ESP]
  1409. XOR EDX,EDX
  1410. MOV DL,CL
  1411. SUB EAX,EDX
  1412. // if X >= 0 then
  1413. JL @9
  1414. // Result.B := PR[PF[X] + Result.B]
  1415. MOVZX EAX,BYTE PTR[EDI+EAX]
  1416. XOR EDX,EDX
  1417. MOV DL,CL
  1418. ADD EAX,EDX
  1419. MOV AL,[ESI+EAX]
  1420. MOV [ESP+$08],AL
  1421. JMP @10
  1422. @9:
  1423. // Result.B := PR[Result.B - PF[-X]];
  1424. NEG EAX
  1425. MOVZX EAX,BYTE PTR[EDI+EAX]
  1426. XOR EDX,EDX
  1427. MOV DL,CL
  1428. SUB EDX,EAX
  1429. MOV AL,[ESI+EDX]
  1430. MOV [ESP+$08],AL
  1431. @10:
  1432. // EAX <- Result
  1433. MOV EAX,[ESP+$08]
  1434. // end;
  1435. ADD ESP,$0C
  1436. POP EDI
  1437. POP ESI
  1438. POP EBX
  1439. RET
  1440. @blend:
  1441. CALL DWORD PTR [BlendReg]
  1442. OR EAX,$FF000000
  1443. RET
  1444. @exit0:
  1445. MOV EAX,EDX
  1446. @Exit:
  1447. end;
  1448. {$ENDIF}
  1449. function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1450. asm
  1451. // combine RGBA channels of colors X and Y with the weight of X given in W
  1452. // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
  1453. {$IFDEF TARGET_x86}
  1454. // EAX <- X
  1455. // EDX <- Y
  1456. // ECX <- W
  1457. // W = 0 or $FF?
  1458. JCXZ @1 // CX = 0 ? => Result := EDX
  1459. CMP ECX,$FF // CX = $FF ? => Result := EDX
  1460. JE @2
  1461. PUSH EBX
  1462. // P = W * X
  1463. MOV EBX,EAX // EBX <- Xa Xr Xg Xb
  1464. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1465. AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
  1466. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1467. SHR EBX,8 // EBX <- 00 Xa 00 Xg
  1468. IMUL EBX,ECX // EBX <- Pa ** Pg **
  1469. ADD EAX,bias
  1470. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1471. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1472. ADD EBX,bias
  1473. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1474. OR EAX,EBX // EAX <- Pa Pr Pg Pb
  1475. // W = 1 - W; Q = W * Y
  1476. XOR ECX,$000000FF // ECX <- 1 - ECX
  1477. MOV EBX,EDX // EBX <- Ya Yr Yg Yb
  1478. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1479. AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
  1480. IMUL EDX,ECX // EDX <- Qr ** Qb **
  1481. SHR EBX,8 // EBX <- 00 Ya 00 Yg
  1482. IMUL EBX,ECX // EBX <- Qa ** Qg **
  1483. ADD EDX,bias
  1484. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1485. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1486. ADD EBX,bias
  1487. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1488. OR EBX,EDX // EBX <- Qa Qr Qg Qb
  1489. // Z = P + Q (assuming no overflow at each byte)
  1490. ADD EAX,EBX // EAX <- Za Zr Zg Zb
  1491. POP EBX
  1492. RET
  1493. @1: MOV EAX,EDX
  1494. @2:
  1495. {$ENDIF}
  1496. {$IFDEF TARGET_x64}
  1497. // ECX <- X
  1498. // EDX <- Y
  1499. // R8D <- W
  1500. // W = 0 or $FF?
  1501. TEST R8D,R8D
  1502. JZ @1 // W = 0 ? => Result := EDX
  1503. MOV EAX,ECX // EAX <- Xa Xr Xg Xb
  1504. CMP R8B,$FF // W = $FF ? => Result := EDX
  1505. JE @2
  1506. // P = W * X
  1507. AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
  1508. AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
  1509. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1510. SHR ECX,8 // ECX <- 00 Xa 00 Xg
  1511. IMUL ECX,R8D // ECX <- Pa ** Pg **
  1512. ADD EAX,bias
  1513. AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
  1514. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1515. ADD ECX,bias
  1516. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1517. OR EAX,ECX // EAX <- Pa Pr Pg Pb
  1518. // W = 1 - W; Q = W * Y
  1519. XOR R8D,$000000FF // R8D <- 1 - R8D
  1520. MOV ECX,EDX // ECX <- Ya Yr Yg Yb
  1521. AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
  1522. AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
  1523. IMUL EDX,R8D // EDX <- Qr ** Qb **
  1524. SHR ECX,8 // ECX <- 00 Ya 00 Yg
  1525. IMUL ECX,R8D // ECX <- Qa ** Qg **
  1526. ADD EDX,bias
  1527. AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
  1528. SHR EDX,8 // EDX <- 00 Qr ** Qb
  1529. ADD ECX,bias
  1530. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1531. OR ECX,EDX // ECX <- Qa Qr Qg Qb
  1532. // Z = P + Q (assuming no overflow at each byte)
  1533. ADD EAX,ECX // EAX <- Za Zr Zg Zb
  1534. RET
  1535. @1: MOV EAX,EDX
  1536. @2:
  1537. {$ENDIF}
  1538. end;
  1539. procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1540. asm
  1541. {$IFDEF TARGET_x86}
  1542. // EAX <- F
  1543. // [EDX] <- B
  1544. // ECX <- W
  1545. // Check W
  1546. JCXZ @1 // W = 0 ? => write nothing
  1547. CMP ECX,$FF // W = 255? => write F
  1548. {$IFDEF FPC}
  1549. DB $74,$76 //Prob with FPC 2.2.2 and below
  1550. {$ELSE}
  1551. JZ @2
  1552. {$ENDIF}
  1553. PUSH EBX
  1554. PUSH ESI
  1555. // P = W * F
  1556. MOV EBX,EAX // EBX <- ** Fr Fg Fb
  1557. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1558. AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
  1559. IMUL EAX,ECX // EAX <- Pr ** Pb **
  1560. SHR EBX,8 // EBX <- 00 Fa 00 Fg
  1561. IMUL EBX,ECX // EBX <- 00 00 Pg **
  1562. ADD EAX,bias
  1563. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1564. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1565. ADD EBX,bias
  1566. AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
  1567. OR EAX,EBX // EAX <- 00 Pr Pg Pb
  1568. // W = 1 - W; Q = W * B
  1569. MOV ESI,[EDX]
  1570. XOR ECX,$000000FF // ECX <- 1 - ECX
  1571. MOV EBX,ESI // EBX <- Ba Br Bg Bb
  1572. AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
  1573. AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
  1574. IMUL ESI,ECX // ESI <- Qr ** Qb **
  1575. SHR EBX,8 // EBX <- 00 Ba 00 Bg
  1576. IMUL EBX,ECX // EBX <- Qa 00 Qg **
  1577. ADD ESI,bias
  1578. AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
  1579. SHR ESI,8 // ESI <- 00 Qr ** Qb
  1580. ADD EBX,bias
  1581. AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
  1582. OR EBX,ESI // EBX <- 00 Qr Qg Qb
  1583. // Z = P + Q (assuming no overflow at each byte)
  1584. ADD EAX,EBX // EAX <- 00 Zr Zg Zb
  1585. MOV [EDX],EAX
  1586. POP ESI
  1587. POP EBX
  1588. @1: RET
  1589. @2: MOV [EDX],EAX
  1590. {$ENDIF}
  1591. {$IFDEF TARGET_x64}
  1592. // ECX <- F
  1593. // [RDX] <- B
  1594. // R8 <- W
  1595. // Check W
  1596. TEST R8D,R8D // Set flags for R8
  1597. JZ @2 // W = 0 ? => Result := EDX
  1598. MOV EAX,ECX // EAX <- ** Fr Fg Fb
  1599. CMP R8B,$FF // W = 255? => write F
  1600. JZ @1
  1601. // P = W * F
  1602. AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
  1603. AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
  1604. IMUL EAX,R8D // EAX <- Pr ** Pb **
  1605. SHR ECX,8 // ECX <- 00 Fa 00 Fg
  1606. IMUL ECX,R8D // ECX <- 00 00 Pg **
  1607. ADD EAX,bias
  1608. AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
  1609. SHR EAX,8 // EAX <- 00 Pr 00 Pb
  1610. ADD ECX,bias
  1611. AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
  1612. OR EAX,ECX // EAX <- 00 Pr Pg Pb
  1613. // W = 1 - W; Q = W * B
  1614. MOV R9D,[RDX]
  1615. XOR R8D,$000000FF // R8D <- 1 - R8D
  1616. MOV ECX,R9D // ECX <- Ba Br Bg Bb
  1617. AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
  1618. AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
  1619. IMUL R9D,R8D // R9D <- Qr ** Qb **
  1620. SHR ECX,8 // ECX <- 00 Ba 00 Bg
  1621. IMUL ECX,R8D // ECX <- Qa 00 Qg **
  1622. ADD R9D,bias
  1623. AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
  1624. SHR R9D,8 // R9D <- 00 Qr ** Qb
  1625. ADD ECX,bias
  1626. AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
  1627. OR ECX,R9D // ECX <- 00 Qr Qg Qb
  1628. // Z = P + Q (assuming no overflow at each byte)
  1629. ADD EAX,ECX // EAX <- 00 Zr Zg Zb
  1630. @1: MOV [RDX],EAX
  1631. @2:
  1632. {$ENDIF}
  1633. end;
  1634. procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1635. asm
  1636. end;
  1637. procedure GenAlphaTable;
  1638. var
  1639. I: Integer;
  1640. L: LongWord;
  1641. P: PLongWord;
  1642. begin
  1643. GetMem(AlphaTable, 257 * 8 * SizeOf(Cardinal));
  1644. {$IFDEF HAS_NATIVEINT}
  1645. alpha_ptr := Pointer(NativeUInt(AlphaTable) and (not $F));
  1646. if NativeUInt(alpha_ptr) < NativeUInt(AlphaTable) then
  1647. alpha_ptr := Pointer(NativeUInt(alpha_ptr) + 16);
  1648. {$ELSE}
  1649. alpha_ptr := Pointer(Cardinal(AlphaTable) and (not $F));
  1650. if Cardinal(alpha_ptr) < Cardinal(AlphaTable) then
  1651. Inc(Cardinal(alpha_ptr), 16);
  1652. {$ENDIF}
  1653. P := alpha_ptr;
  1654. for I := 0 to 255 do
  1655. begin
  1656. L := I + I shl 16;
  1657. P^ := L;
  1658. Inc(P);
  1659. P^ := L;
  1660. Inc(P);
  1661. P^ := L;
  1662. Inc(P);
  1663. P^ := L;
  1664. Inc(P);
  1665. end;
  1666. bias_ptr := alpha_ptr;
  1667. Inc(PLongWord(bias_ptr), 4 * $80);
  1668. end;
  1669. procedure FreeAlphaTable;
  1670. begin
  1671. FreeMem(AlphaTable);
  1672. end;
  1673. {$IFNDEF OMIT_MMX}
  1674. { MMX versions }
  1675. function BlendReg_MMX(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1676. asm
  1677. // blend foreground color (F) to a background color (B),
  1678. // using alpha channel value of F
  1679. {$IFDEF TARGET_x86}
  1680. // EAX <- F
  1681. // EDX <- B
  1682. // Result := Fa * (Frgb - Brgb) + Brgb
  1683. MOVD MM0,EAX
  1684. PXOR MM3,MM3
  1685. MOVD MM2,EDX
  1686. PUNPCKLBW MM0,MM3
  1687. MOV ECX,bias_ptr
  1688. PUNPCKLBW MM2,MM3
  1689. MOVQ MM1,MM0
  1690. PUNPCKHWD MM1,MM1
  1691. PSUBW MM0,MM2
  1692. PUNPCKHDQ MM1,MM1
  1693. PSLLW MM2,8
  1694. PMULLW MM0,MM1
  1695. PADDW MM2,[ECX]
  1696. PADDW MM2,MM0
  1697. PSRLW MM2,8
  1698. PACKUSWB MM2,MM3
  1699. MOVD EAX,MM2
  1700. {$ENDIF}
  1701. {$IFDEF TARGET_x64}
  1702. // ECX <- F
  1703. // EDX <- B
  1704. // Result := Fa * (Frgb - Brgb) + Brgb
  1705. MOVD MM0,ECX
  1706. PXOR MM3,MM3
  1707. MOVD MM2,EDX
  1708. PUNPCKLBW MM0,MM3
  1709. {$IFNDEF FPC}
  1710. MOV RAX,bias_ptr
  1711. {$ELSE}
  1712. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  1713. {$ENDIF}
  1714. PUNPCKLBW MM2,MM3
  1715. MOVQ MM1,MM0
  1716. PUNPCKHWD MM1,MM1
  1717. PSUBW MM0,MM2
  1718. PUNPCKHDQ MM1,MM1
  1719. PSLLW MM2,8
  1720. PMULLW MM0,MM1
  1721. PADDW MM2,[RAX]
  1722. PADDW MM2,MM0
  1723. PSRLW MM2,8
  1724. PACKUSWB MM2,MM3
  1725. MOVD EAX,MM2
  1726. {$ENDIF}
  1727. end;
  1728. {$IFDEF TARGET_x86}
  1729. procedure BlendMem_MMX(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1730. asm
  1731. // EAX - Color X
  1732. // [EDX] - Color Y
  1733. // Result := W * (X - Y) + Y
  1734. TEST EAX,$FF000000
  1735. JZ @1
  1736. CMP EAX,$FF000000
  1737. JNC @2
  1738. PXOR MM3,MM3
  1739. MOVD MM0,EAX
  1740. MOVD MM2,[EDX]
  1741. PUNPCKLBW MM0,MM3
  1742. MOV ECX,bias_ptr
  1743. PUNPCKLBW MM2,MM3
  1744. MOVQ MM1,MM0
  1745. PUNPCKHWD MM1,MM1
  1746. PSUBW MM0,MM2
  1747. PUNPCKHDQ MM1,MM1
  1748. PSLLW MM2,8
  1749. PMULLW MM0,MM1
  1750. PADDW MM2,[ECX]
  1751. PADDW MM2,MM0
  1752. PSRLW MM2,8
  1753. PACKUSWB MM2,MM3
  1754. MOVD [EDX],MM2
  1755. @1: RET
  1756. @2: MOV [EDX],EAX
  1757. end;
  1758. function BlendRegEx_MMX(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1759. asm
  1760. // blend foreground color (F) to a background color (B),
  1761. // using alpha channel value of F
  1762. // EAX <- F
  1763. // EDX <- B
  1764. // ECX <- M
  1765. // Result := M * Fa * (Frgb - Brgb) + Brgb
  1766. PUSH EBX
  1767. MOV EBX,EAX
  1768. SHR EBX,24
  1769. INC ECX // 255:256 range bias
  1770. IMUL ECX,EBX
  1771. SHR ECX,8
  1772. JZ @1
  1773. PXOR MM0,MM0
  1774. MOVD MM1,EAX
  1775. SHL ECX,4
  1776. MOVD MM2,EDX
  1777. PUNPCKLBW MM1,MM0
  1778. PUNPCKLBW MM2,MM0
  1779. ADD ECX,alpha_ptr
  1780. PSUBW MM1,MM2
  1781. PMULLW MM1,[ECX]
  1782. PSLLW MM2,8
  1783. MOV ECX,bias_ptr
  1784. PADDW MM2,[ECX]
  1785. PADDW MM1,MM2
  1786. PSRLW MM1,8
  1787. PACKUSWB MM1,MM0
  1788. MOVD EAX,MM1
  1789. POP EBX
  1790. RET
  1791. @1: MOV EAX,EDX
  1792. POP EBX
  1793. end;
  1794. {$ENDIF}
  1795. procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1796. asm
  1797. {$IFDEF TARGET_x86}
  1798. // blend foreground color (F) to a background color (B),
  1799. // using alpha channel value of F
  1800. // EAX <- F
  1801. // [EDX] <- B
  1802. // ECX <- M
  1803. // Result := M * Fa * (Frgb - Brgb) + Brgb
  1804. TEST EAX,$FF000000
  1805. JZ @2
  1806. PUSH EBX
  1807. MOV EBX,EAX
  1808. SHR EBX,24
  1809. INC ECX // 255:256 range bias
  1810. IMUL ECX,EBX
  1811. SHR ECX,8
  1812. JZ @1
  1813. PXOR MM0,MM0
  1814. MOVD MM1,EAX
  1815. SHL ECX,4
  1816. MOVD MM2,[EDX]
  1817. PUNPCKLBW MM1,MM0
  1818. PUNPCKLBW MM2,MM0
  1819. ADD ECX,alpha_ptr
  1820. PSUBW MM1,MM2
  1821. PMULLW MM1,[ECX]
  1822. PSLLW MM2,8
  1823. MOV ECX,bias_ptr
  1824. PADDW MM2,[ECX]
  1825. PADDW MM1,MM2
  1826. PSRLW MM1,8
  1827. PACKUSWB MM1,MM0
  1828. MOVD [EDX],MM1
  1829. @1: POP EBX
  1830. @2:
  1831. {$ENDIF}
  1832. {$IFDEF TARGET_x64}
  1833. // blend foreground color (F) to a background color (B),
  1834. // using alpha channel value of F
  1835. // ECX <- F
  1836. // [EDX] <- B
  1837. // R8 <- M
  1838. // Result := M * Fa * (Frgb - Brgb) + Brgb
  1839. TEST ECX,$FF000000
  1840. JZ @1
  1841. MOV EAX,ECX
  1842. SHR EAX,24
  1843. INC R8D // 255:256 range bias
  1844. IMUL R8D,EAX
  1845. SHR R8D,8
  1846. JZ @1
  1847. PXOR MM0,MM0
  1848. MOVD MM1,ECX
  1849. SHL R8D,4
  1850. MOVD MM2,[RDX]
  1851. PUNPCKLBW MM1,MM0
  1852. PUNPCKLBW MM2,MM0
  1853. {$IFNDEF FPC}
  1854. ADD R8,alpha_ptr
  1855. {$ELSE}
  1856. ADD R8,[RIP+alpha_ptr]
  1857. {$ENDIF}
  1858. PSUBW MM1,MM2
  1859. PMULLW MM1,[R8]
  1860. PSLLW MM2,8
  1861. {$IFNDEF FPC}
  1862. MOV RAX,bias_ptr
  1863. {$ELSE}
  1864. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  1865. {$ENDIF}
  1866. PADDW MM2,[RAX]
  1867. PADDW MM1,MM2
  1868. PSRLW MM1,8
  1869. PACKUSWB MM1,MM0
  1870. MOVD [RDX],MM1
  1871. @1:
  1872. {$ENDIF}
  1873. end;
  1874. function BlendRegRGB_MMX(F, B, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1875. asm
  1876. {$IFDEF TARGET_x86}
  1877. PXOR MM2,MM2
  1878. MOVD MM0,EAX
  1879. PUNPCKLBW MM0,MM2
  1880. MOVD MM1,EDX
  1881. PUNPCKLBW MM1,MM2
  1882. BSWAP ECX
  1883. PSUBW MM0,MM1
  1884. MOVD MM3,ECX
  1885. PUNPCKLBW MM3,MM2
  1886. PMULLW MM0,MM3
  1887. MOV EAX,bias_ptr
  1888. PSLLW MM1,8
  1889. PADDW MM1,[EAX]
  1890. PADDW MM1,MM0
  1891. PSRLW MM1,8
  1892. PACKUSWB MM1,MM2
  1893. MOVD EAX,MM1
  1894. {$ENDIF}
  1895. {$IFDEF TARGET_x64}
  1896. PXOR MM2,MM2
  1897. MOVD MM0,ECX
  1898. PUNPCKLBW MM0,MM2
  1899. MOVD MM1,EDX
  1900. PUNPCKLBW MM1,MM2
  1901. BSWAP R8D
  1902. PSUBW MM0,MM1
  1903. MOVD MM3,R8D
  1904. PUNPCKLBW MM3,MM2
  1905. PMULLW MM0,MM3
  1906. {$IFNDEF FPC}
  1907. MOV RAX,bias_ptr
  1908. {$ELSE}
  1909. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  1910. {$ENDIF}
  1911. PSLLW MM1,8
  1912. PADDW MM1,[RAX]
  1913. PADDW MM1,MM0
  1914. PSRLW MM1,8
  1915. PACKUSWB MM1,MM2
  1916. MOVD EAX,MM1
  1917. {$ENDIF}
  1918. end;
  1919. procedure BlendMemRGB_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1920. asm
  1921. {$IFDEF TARGET_x86}
  1922. PXOR MM2,MM2
  1923. MOVD MM0,EAX
  1924. PUNPCKLBW MM0,MM2
  1925. MOVD MM1,[EDX]
  1926. PUNPCKLBW MM1,MM2
  1927. BSWAP ECX
  1928. PSUBW MM0,MM1
  1929. MOVD MM3,ECX
  1930. PUNPCKLBW MM3,MM2
  1931. PMULLW MM0,MM3
  1932. MOV EAX,bias_ptr
  1933. PSLLW MM1,8
  1934. PADDW MM1,[EAX]
  1935. PADDW MM1,MM0
  1936. PSRLW MM1,8
  1937. PACKUSWB MM1,MM2
  1938. MOVD [EDX],MM1
  1939. {$ENDIF}
  1940. {$IFDEF TARGET_x64}
  1941. PXOR MM2,MM2
  1942. MOVD MM0,ECX
  1943. PUNPCKLBW MM0,MM2
  1944. MOVD MM1,[EDX]
  1945. PUNPCKLBW MM1,MM2
  1946. BSWAP R8D
  1947. PSUBW MM0,MM1
  1948. MOVD MM3,R8D
  1949. PUNPCKLBW MM3,MM2
  1950. PMULLW MM0,MM3
  1951. {$IFNDEF FPC}
  1952. MOV RAX,bias_ptr
  1953. {$ELSE}
  1954. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  1955. {$ENDIF}
  1956. PSLLW MM1,8
  1957. PADDW MM1,[RAX]
  1958. PADDW MM1,MM0
  1959. PSRLW MM1,8
  1960. PACKUSWB MM1,MM2
  1961. MOVD [EDX],MM1
  1962. {$ENDIF}
  1963. end;
  1964. {$IFDEF TARGET_x86}
  1965. procedure BlendLine_MMX(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  1966. asm
  1967. // EAX <- Src
  1968. // EDX <- Dst
  1969. // ECX <- Count
  1970. // test the counter for zero or negativity
  1971. TEST ECX,ECX
  1972. JS @4
  1973. PUSH ESI
  1974. PUSH EDI
  1975. MOV ESI,EAX // ESI <- Src
  1976. MOV EDI,EDX // EDI <- Dst
  1977. // loop start
  1978. @1: MOV EAX,[ESI]
  1979. TEST EAX,$FF000000
  1980. JZ @3 // complete transparency, proceed to next point
  1981. CMP EAX,$FF000000
  1982. JNC @2 // opaque pixel, copy without blending
  1983. // blend
  1984. MOVD MM0,EAX // MM0 <- 00 00 00 00 Fa Fr Fg Fb
  1985. PXOR MM3,MM3 // MM3 <- 00 00 00 00 00 00 00 00
  1986. MOVD MM2,[EDI] // MM2 <- 00 00 00 00 Ba Br Bg Bb
  1987. PUNPCKLBW MM0,MM3 // MM0 <- 00 Fa 00 Fr 00 Fg 00 Fb
  1988. MOV EAX,bias_ptr
  1989. PUNPCKLBW MM2,MM3 // MM2 <- 00 Ba 00 Br 00 Bg 00 Bb
  1990. MOVQ MM1,MM0 // MM1 <- 00 Fa 00 Fr 00 Fg 00 Fb
  1991. PUNPCKHWD MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 ** 00 **
  1992. PSUBW MM0,MM2 // MM0 <- 00 Da 00 Dr 00 Dg 00 Db
  1993. PUNPCKHDQ MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 Fa 00 Fa
  1994. PSLLW MM2,8 // MM2 <- Ba 00 Br 00 Bg 00 Bb 00
  1995. PMULLW MM0,MM1 // MM2 <- Pa ** Pr ** Pg ** Pb **
  1996. PADDW MM2,[EAX] // add bias
  1997. PADDW MM2,MM0 // MM2 <- Qa ** Qr ** Qg ** Qb **
  1998. PSRLW MM2,8 // MM2 <- 00 Qa 00 Qr 00 Qg 00 Qb
  1999. PACKUSWB MM2,MM3 // MM2 <- 00 00 00 00 Qa Qr Qg Qb
  2000. MOVD EAX,MM2
  2001. @2: MOV [EDI],EAX
  2002. @3: ADD ESI,4
  2003. ADD EDI,4
  2004. // loop end
  2005. DEC ECX
  2006. JNZ @1
  2007. POP EDI
  2008. POP ESI
  2009. @4:
  2010. end;
  2011. procedure BlendLineEx_MMX(Src, Dst: PColor32; Count: Integer; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2012. asm
  2013. // EAX <- Src
  2014. // EDX <- Dst
  2015. // ECX <- Count
  2016. // test the counter for zero or negativity
  2017. TEST ECX,ECX
  2018. JS @4
  2019. PUSH ESI
  2020. PUSH EDI
  2021. PUSH EBX
  2022. MOV ESI,EAX // ESI <- Src
  2023. MOV EDI,EDX // EDI <- Dst
  2024. MOV EDX,M // EDX <- Master Alpha
  2025. // loop start
  2026. @1: MOV EAX,[ESI]
  2027. TEST EAX,$FF000000
  2028. JZ @3 // complete transparency, proceed to next point
  2029. MOV EBX,EAX
  2030. SHR EBX,24
  2031. INC EBX // 255:256 range bias
  2032. IMUL EBX,EDX
  2033. SHR EBX,8
  2034. JZ @3 // complete transparency, proceed to next point
  2035. // blend
  2036. PXOR MM0,MM0
  2037. MOVD MM1,EAX
  2038. SHL EBX,4
  2039. MOVD MM2,[EDI]
  2040. PUNPCKLBW MM1,MM0
  2041. PUNPCKLBW MM2,MM0
  2042. ADD EBX,alpha_ptr
  2043. PSUBW MM1,MM2
  2044. PMULLW MM1,[EBX]
  2045. PSLLW MM2,8
  2046. MOV EBX,bias_ptr
  2047. PADDW MM2,[EBX]
  2048. PADDW MM1,MM2
  2049. PSRLW MM1,8
  2050. PACKUSWB MM1,MM0
  2051. MOVD EAX,MM1
  2052. @2: MOV [EDI],EAX
  2053. @3: ADD ESI,4
  2054. ADD EDI,4
  2055. // loop end
  2056. DEC ECX
  2057. JNZ @1
  2058. POP EBX
  2059. POP EDI
  2060. POP ESI
  2061. @4:
  2062. end;
  2063. {$ENDIF}
  2064. function CombineReg_MMX(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2065. asm
  2066. {$IFDEF TARGET_X86}
  2067. // EAX - Color X
  2068. // EDX - Color Y
  2069. // ECX - Weight of X [0..255]
  2070. // Result := W * (X - Y) + Y
  2071. MOVD MM1,EAX
  2072. PXOR MM0,MM0
  2073. SHL ECX,4
  2074. MOVD MM2,EDX
  2075. PUNPCKLBW MM1,MM0
  2076. PUNPCKLBW MM2,MM0
  2077. ADD ECX,alpha_ptr
  2078. PSUBW MM1,MM2
  2079. PMULLW MM1,[ECX]
  2080. PSLLW MM2,8
  2081. MOV ECX,bias_ptr
  2082. PADDW MM2,[ECX]
  2083. PADDW MM1,MM2
  2084. PSRLW MM1,8
  2085. PACKUSWB MM1,MM0
  2086. MOVD EAX,MM1
  2087. {$ENDIF}
  2088. {$IFDEF TARGET_X64}
  2089. // ECX - Color X
  2090. // EDX - Color Y
  2091. // R8 - Weight of X [0..255]
  2092. // Result := W * (X - Y) + Y
  2093. MOVD MM1,ECX
  2094. PXOR MM0,MM0
  2095. SHL R8D,4
  2096. MOVD MM2,EDX
  2097. PUNPCKLBW MM1,MM0
  2098. PUNPCKLBW MM2,MM0
  2099. {$IFNDEF FPC}
  2100. ADD R8,alpha_ptr
  2101. {$ELSE}
  2102. ADD R8,[RIP+alpha_ptr]
  2103. {$ENDIF}
  2104. PSUBW MM1,MM2
  2105. PMULLW MM1,[R8]
  2106. PSLLW MM2,8
  2107. {$IFNDEF FPC}
  2108. MOV RAX,bias_ptr
  2109. {$ELSE}
  2110. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2111. {$ENDIF}
  2112. PADDW MM2,[RAX]
  2113. PADDW MM1,MM2
  2114. PSRLW MM1,8
  2115. PACKUSWB MM1,MM0
  2116. MOVD EAX,MM1
  2117. {$ENDIF}
  2118. end;
  2119. procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2120. asm
  2121. {$IFDEF TARGET_X86}
  2122. // EAX - Color X
  2123. // [EDX] - Color Y
  2124. // ECX - Weight of X [0..255]
  2125. // Result := W * (X - Y) + Y
  2126. JCXZ @1
  2127. CMP ECX,$FF
  2128. JZ @2
  2129. MOVD MM1,EAX
  2130. PXOR MM0,MM0
  2131. SHL ECX,4
  2132. MOVD MM2,[EDX]
  2133. PUNPCKLBW MM1,MM0
  2134. PUNPCKLBW MM2,MM0
  2135. ADD ECX,alpha_ptr
  2136. PSUBW MM1,MM2
  2137. PMULLW MM1,[ECX]
  2138. PSLLW MM2,8
  2139. MOV ECX,bias_ptr
  2140. PADDW MM2,[ECX]
  2141. PADDW MM1,MM2
  2142. PSRLW MM1,8
  2143. PACKUSWB MM1,MM0
  2144. MOVD [EDX],MM1
  2145. @1: RET
  2146. @2: MOV [EDX],EAX
  2147. {$ENDIF}
  2148. {$IFDEF TARGET_x64}
  2149. // ECX - Color X
  2150. // [RDX] - Color Y
  2151. // R8 - Weight of X [0..255]
  2152. // Result := W * (X - Y) + Y
  2153. TEST R8D,R8D // Set flags for R8
  2154. JZ @1 // W = 0 ? => Result := EDX
  2155. CMP R8D,$FF
  2156. JZ @2
  2157. MOVD MM1,ECX
  2158. PXOR MM0,MM0
  2159. SHL R8D,4
  2160. MOVD MM2,[RDX]
  2161. PUNPCKLBW MM1,MM0
  2162. PUNPCKLBW MM2,MM0
  2163. {$IFNDEF FPC}
  2164. ADD R8,alpha_ptr
  2165. {$ELSE}
  2166. ADD R8,[RIP+alpha_ptr]
  2167. {$ENDIF}
  2168. PSUBW MM1,MM2
  2169. PMULLW MM1,[R8]
  2170. PSLLW MM2,8
  2171. {$IFNDEF FPC}
  2172. MOV RAX,bias_ptr
  2173. {$ELSE}
  2174. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2175. {$ENDIF}
  2176. PADDW MM2,[RAX]
  2177. PADDW MM1,MM2
  2178. PSRLW MM1,8
  2179. PACKUSWB MM1,MM0
  2180. MOVD [RDX],MM1
  2181. @1: RET
  2182. @2: MOV [RDX],RCX
  2183. {$ENDIF}
  2184. end;
  2185. {$IFDEF TARGET_x86}
  2186. procedure CombineLine_MMX(Src, Dst: PColor32; Count: Integer; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2187. asm
  2188. // EAX <- Src
  2189. // EDX <- Dst
  2190. // ECX <- Count
  2191. // Result := W * (X - Y) + Y
  2192. TEST ECX,ECX
  2193. JS @3
  2194. PUSH EBX
  2195. MOV EBX,W
  2196. TEST EBX,EBX
  2197. JZ @2 // weight is zero
  2198. CMP EBX,$FF
  2199. JZ @4 // weight = 255 => copy src to dst
  2200. SHL EBX,4
  2201. ADD EBX,alpha_ptr
  2202. MOVQ MM3,[EBX]
  2203. MOV EBX,bias_ptr
  2204. MOVQ MM4,[EBX]
  2205. // loop start
  2206. @1: MOVD MM1,[EAX]
  2207. PXOR MM0,MM0
  2208. MOVD MM2,[EDX]
  2209. PUNPCKLBW MM1,MM0
  2210. PUNPCKLBW MM2,MM0
  2211. PSUBW MM1,MM2
  2212. PMULLW MM1,MM3
  2213. PSLLW MM2,8
  2214. PADDW MM2,MM4
  2215. PADDW MM1,MM2
  2216. PSRLW MM1,8
  2217. PACKUSWB MM1,MM0
  2218. MOVD [EDX],MM1
  2219. ADD EAX,4
  2220. ADD EDX,4
  2221. DEC ECX
  2222. JNZ @1
  2223. @2: POP EBX
  2224. POP EBP
  2225. @3: RET $0004
  2226. @4: CALL GR32_LowLevel.MoveLongword
  2227. POP EBX
  2228. end;
  2229. {$ENDIF}
  2230. procedure EMMS_MMX; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2231. asm
  2232. EMMS
  2233. end;
  2234. function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2235. asm
  2236. {$IFDEF TARGET_X86}
  2237. MOVD MM0,EAX
  2238. TEST EDX,EDX
  2239. JL @1
  2240. IMUL EDX,$010101
  2241. MOVD MM1,EDX
  2242. PADDUSB MM0,MM1
  2243. MOVD EAX,MM0
  2244. RET
  2245. @1: NEG EDX
  2246. IMUL EDX,$010101
  2247. MOVD MM1,EDX
  2248. PSUBUSB MM0,MM1
  2249. MOVD EAX,MM0
  2250. {$ENDIF}
  2251. {$IFDEF TARGET_X64}
  2252. MOVD MM0,ECX
  2253. TEST EDX,EDX
  2254. JL @1
  2255. IMUL EDX,$010101
  2256. MOVD MM1,EDX
  2257. PADDUSB MM0,MM1
  2258. MOVD EAX,MM0
  2259. RET
  2260. @1: NEG EDX
  2261. IMUL EDX,$010101
  2262. MOVD MM1,EDX
  2263. PSUBUSB MM0,MM1
  2264. MOVD EAX,MM0
  2265. {$ENDIF}
  2266. end;
  2267. { MMX Color algebra versions }
  2268. function ColorAdd_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2269. asm
  2270. {$IFDEF TARGET_X86}
  2271. MOVD MM0,EAX
  2272. MOVD MM1,EDX
  2273. PADDUSB MM0,MM1
  2274. MOVD EAX,MM0
  2275. {$ENDIF}
  2276. {$IFDEF TARGET_X64}
  2277. MOVD MM0,ECX
  2278. MOVD MM1,EDX
  2279. PADDUSB MM0,MM1
  2280. MOVD EAX,MM0
  2281. {$ENDIF}
  2282. end;
  2283. function ColorSub_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2284. asm
  2285. {$IFDEF TARGET_X86}
  2286. MOVD MM0,EAX
  2287. MOVD MM1,EDX
  2288. PSUBUSB MM0,MM1
  2289. MOVD EAX,MM0
  2290. {$ENDIF}
  2291. {$IFDEF TARGET_X64}
  2292. MOVD MM0,ECX
  2293. MOVD MM1,EDX
  2294. PSUBUSB MM0,MM1
  2295. MOVD EAX,MM0
  2296. {$ENDIF}
  2297. end;
  2298. function ColorModulate_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2299. asm
  2300. {$IFDEF TARGET_X86}
  2301. PXOR MM2,MM2
  2302. MOVD MM0,EAX
  2303. PUNPCKLBW MM0,MM2
  2304. MOVD MM1,EDX
  2305. PUNPCKLBW MM1,MM2
  2306. PMULLW MM0,MM1
  2307. PSRLW MM0,8
  2308. PACKUSWB MM0,MM2
  2309. MOVD EAX,MM0
  2310. {$ENDIF}
  2311. {$IFDEF TARGET_X64}
  2312. PXOR MM2,MM2
  2313. MOVD MM0,ECX
  2314. PUNPCKLBW MM0,MM2
  2315. MOVD MM1,EDX
  2316. PUNPCKLBW MM1,MM2
  2317. PMULLW MM0,MM1
  2318. PSRLW MM0,8
  2319. PACKUSWB MM0,MM2
  2320. MOVD EAX,MM0
  2321. {$ENDIF}
  2322. end;
  2323. function ColorMax_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2324. asm
  2325. {$IFDEF TARGET_X86}
  2326. MOVD MM0,EAX
  2327. MOVD MM1,EDX
  2328. PMAXUB MM0,MM1
  2329. MOVD EAX,MM0
  2330. {$ENDIF}
  2331. {$IFDEF TARGET_X64}
  2332. MOVD MM0,ECX
  2333. MOVD MM1,EDX
  2334. PMAXUB MM0,MM1
  2335. MOVD EAX,MM0
  2336. {$ENDIF}
  2337. end;
  2338. function ColorMin_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2339. asm
  2340. {$IFDEF TARGET_X86}
  2341. MOVD MM0,EAX
  2342. MOVD MM1,EDX
  2343. PMINUB MM0,MM1
  2344. MOVD EAX,MM0
  2345. {$ENDIF}
  2346. {$IFDEF TARGET_X64}
  2347. MOVD MM0,ECX
  2348. MOVD MM1,EDX
  2349. PMINUB MM0,MM1
  2350. MOVD EAX,MM0
  2351. {$ENDIF}
  2352. end;
  2353. function ColorDifference_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2354. asm
  2355. {$IFDEF TARGET_X86}
  2356. MOVD MM0,EAX
  2357. MOVD MM1,EDX
  2358. MOVQ MM2,MM0
  2359. PSUBUSB MM0,MM1
  2360. PSUBUSB MM1,MM2
  2361. POR MM0,MM1
  2362. MOVD EAX,MM0
  2363. {$ENDIF}
  2364. {$IFDEF TARGET_X64}
  2365. MOVD MM0,ECX
  2366. MOVD MM1,EDX
  2367. MOVQ MM2,MM0
  2368. PSUBUSB MM0,MM1
  2369. PSUBUSB MM1,MM2
  2370. POR MM0,MM1
  2371. MOVD EAX,MM0
  2372. {$ENDIF}
  2373. end;
  2374. function ColorExclusion_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2375. asm
  2376. {$IFDEF TARGET_X86}
  2377. PXOR MM2,MM2
  2378. MOVD MM0,EAX
  2379. PUNPCKLBW MM0,MM2
  2380. MOVD MM1,EDX
  2381. PUNPCKLBW MM1,MM2
  2382. MOVQ MM3,MM0
  2383. PADDW MM0,MM1
  2384. PMULLW MM1,MM3
  2385. PSRLW MM1,7
  2386. PSUBUSW MM0,MM1
  2387. PACKUSWB MM0,MM2
  2388. MOVD EAX,MM0
  2389. {$ENDIF}
  2390. {$IFDEF TARGET_X64}
  2391. PXOR MM2,MM2
  2392. MOVD MM0,ECX
  2393. PUNPCKLBW MM0,MM2
  2394. MOVD MM1,EDX
  2395. PUNPCKLBW MM1,MM2
  2396. MOVQ MM3,MM0
  2397. PADDW MM0,MM1
  2398. PMULLW MM1,MM3
  2399. PSRLW MM1,7
  2400. PSUBUSW MM0,MM1
  2401. PACKUSWB MM0,MM2
  2402. MOVD EAX,MM0
  2403. {$ENDIF}
  2404. end;
  2405. function ColorScale_MMX(C, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2406. asm
  2407. {$IFDEF TARGET_X86}
  2408. PXOR MM2,MM2
  2409. SHL EDX,4
  2410. MOVD MM0,EAX
  2411. PUNPCKLBW MM0,MM2
  2412. ADD EDX,alpha_ptr
  2413. PMULLW MM0,[EDX]
  2414. PSRLW MM0,8
  2415. PACKUSWB MM0,MM2
  2416. MOVD EAX,MM0
  2417. {$ENDIF}
  2418. {$IFDEF TARGET_X64}
  2419. PXOR MM2,MM2
  2420. SHL RDX,4
  2421. MOVD MM0,ECX
  2422. PUNPCKLBW MM0,MM2
  2423. {$IFNDEF FPC}
  2424. ADD RDX,alpha_ptr
  2425. {$ELSE}
  2426. ADD RDX,[RIP+alpha_ptr]
  2427. {$ENDIF}
  2428. PMULLW MM0,[RDX]
  2429. PSRLW MM0,8
  2430. PACKUSWB MM0,MM2
  2431. MOVD EAX,MM0
  2432. {$ENDIF}
  2433. end;
  2434. {$ENDIF}
  2435. { SSE2 versions }
  2436. {$IFNDEF OMIT_SSE2}
  2437. function BlendReg_SSE2(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2438. asm
  2439. // blend foreground color (F) to a background color (B),
  2440. // using alpha channel value of F
  2441. // EAX <- F
  2442. // EDX <- B
  2443. // Result := Fa * (Frgb - Brgb) + Brgb
  2444. {$IFDEF TARGET_x86}
  2445. MOVD XMM0,EAX
  2446. PXOR XMM3,XMM3
  2447. MOVD XMM2,EDX
  2448. PUNPCKLBW XMM0,XMM3
  2449. MOV ECX,bias_ptr
  2450. PUNPCKLBW XMM2,XMM3
  2451. MOVQ XMM1,XMM0
  2452. PSHUFLW XMM1,XMM1, $FF
  2453. PSUBW XMM0,XMM2
  2454. PSLLW XMM2,8
  2455. PMULLW XMM0,XMM1
  2456. PADDW XMM2,[ECX]
  2457. PADDW XMM2,XMM0
  2458. PSRLW XMM2,8
  2459. PACKUSWB XMM2,XMM3
  2460. MOVD EAX,XMM2
  2461. {$ENDIF}
  2462. {$IFDEF TARGET_x64}
  2463. MOVD XMM0,ECX
  2464. PXOR XMM3,XMM3
  2465. MOVD XMM2,EDX
  2466. PUNPCKLBW XMM0,XMM3
  2467. {$IFNDEF FPC}
  2468. MOV RAX,bias_ptr
  2469. {$ELSE}
  2470. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2471. {$ENDIF}
  2472. PUNPCKLBW XMM2,XMM3
  2473. MOVQ XMM1,XMM0
  2474. PSHUFLW XMM1,XMM1, $FF
  2475. PSUBW XMM0,XMM2
  2476. PSLLW XMM2,8
  2477. PMULLW XMM0,XMM1
  2478. PADDW XMM2,[RAX]
  2479. PADDW XMM2,XMM0
  2480. PSRLW XMM2,8
  2481. PACKUSWB XMM2,XMM3
  2482. MOVD EAX,XMM2
  2483. {$ENDIF}
  2484. end;
  2485. procedure BlendMem_SSE2(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2486. asm
  2487. {$IFDEF TARGET_x86}
  2488. // EAX - Color X
  2489. // [EDX] - Color Y
  2490. // Result := W * (X - Y) + Y
  2491. TEST EAX,$FF000000
  2492. JZ @1
  2493. CMP EAX,$FF000000
  2494. JNC @2
  2495. PXOR XMM3,XMM3
  2496. MOVD XMM0,EAX
  2497. MOVD XMM2,[EDX]
  2498. PUNPCKLBW XMM0,XMM3
  2499. MOV ECX,bias_ptr
  2500. PUNPCKLBW XMM2,XMM3
  2501. MOVQ XMM1,XMM0
  2502. PSHUFLW XMM1,XMM1, $FF
  2503. PSUBW XMM0,XMM2
  2504. PSLLW XMM2,8
  2505. PMULLW XMM0,XMM1
  2506. PADDW XMM2,[ECX]
  2507. PADDW XMM2,XMM0
  2508. PSRLW XMM2,8
  2509. PACKUSWB XMM2,XMM3
  2510. MOVD [EDX],XMM2
  2511. @1: RET
  2512. @2: MOV [EDX], EAX
  2513. {$ENDIF}
  2514. {$IFDEF TARGET_x64}
  2515. // ECX - Color X
  2516. // [EDX] - Color Y
  2517. // Result := W * (X - Y) + Y
  2518. TEST ECX,$FF000000
  2519. JZ @1
  2520. CMP ECX,$FF000000
  2521. JNC @2
  2522. PXOR XMM3,XMM3
  2523. MOVD XMM0,ECX
  2524. MOVD XMM2,[RDX]
  2525. PUNPCKLBW XMM0,XMM3
  2526. {$IFNDEF FPC}
  2527. MOV RAX,bias_ptr
  2528. {$ELSE}
  2529. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2530. {$ENDIF}
  2531. PUNPCKLBW XMM2,XMM3
  2532. MOVQ XMM1,XMM0
  2533. PSHUFLW XMM1,XMM1, $FF
  2534. PSUBW XMM0,XMM2
  2535. PSLLW XMM2,8
  2536. PMULLW XMM0,XMM1
  2537. PADDW XMM2,[RAX]
  2538. PADDW XMM2,XMM0
  2539. PSRLW XMM2,8
  2540. PACKUSWB XMM2,XMM3
  2541. MOVD [RDX],XMM2
  2542. @1: RET
  2543. @2: MOV [RDX], ECX
  2544. {$ENDIF}
  2545. end;
  2546. procedure BlendMems_SSE2(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2547. asm
  2548. {$IFDEF TARGET_x86}
  2549. TEST ECX,ECX
  2550. JZ @Done
  2551. TEST EAX,$FF000000
  2552. JZ @Done
  2553. PUSH EBX
  2554. MOV EBX,EAX
  2555. SHR EBX,24
  2556. CMP EBX,$FF
  2557. JZ @CopyPixel
  2558. MOVD XMM4,EAX
  2559. PXOR XMM3,XMM3
  2560. PUNPCKLBW XMM4,XMM3
  2561. MOV EBX,bias_ptr
  2562. @LoopStart:
  2563. MOVD XMM2,[EDX]
  2564. PUNPCKLBW XMM2,XMM3
  2565. MOVQ XMM1,XMM4
  2566. PUNPCKLBW XMM1,XMM3
  2567. PUNPCKHWD XMM1,XMM1
  2568. MOVQ XMM0,XMM4
  2569. PSUBW XMM0,XMM2
  2570. PUNPCKHDQ XMM1,XMM1
  2571. PSLLW XMM2,8
  2572. PMULLW XMM0,XMM1
  2573. PADDW XMM2,[EBX]
  2574. PADDW XMM2,XMM0
  2575. PSRLW XMM2,8
  2576. PACKUSWB XMM2,XMM3
  2577. MOVD [EDX],XMM2
  2578. @NextPixel:
  2579. ADD EDX,4
  2580. DEC ECX
  2581. JNZ @LoopStart
  2582. POP EBX
  2583. @Done:
  2584. RET
  2585. @CopyPixel:
  2586. MOV [EDX],EAX
  2587. ADD EDX,4
  2588. DEC ECX
  2589. JNZ @CopyPixel
  2590. POP EBX
  2591. {$ENDIF}
  2592. {$IFDEF TARGET_x64}
  2593. TEST R8D,R8D
  2594. JZ @Done
  2595. TEST ECX,$FF000000
  2596. JZ @Done
  2597. MOV RAX,RCX
  2598. SHR EAX,24
  2599. CMP EAX,$FF
  2600. JZ @CopyPixel
  2601. MOVD XMM4,ECX
  2602. PXOR XMM3,XMM3
  2603. PUNPCKLBW XMM4,XMM3
  2604. MOV RAX,bias_ptr
  2605. @LoopStart:
  2606. MOVD XMM2,[RDX]
  2607. PUNPCKLBW XMM2,XMM3
  2608. MOVQ XMM1,XMM4
  2609. PUNPCKLBW XMM1,XMM3
  2610. PUNPCKHWD XMM1,XMM1
  2611. MOVQ XMM0,XMM4
  2612. PSUBW XMM0,XMM2
  2613. PUNPCKHDQ XMM1,XMM1
  2614. PSLLW XMM2,8
  2615. PMULLW XMM0,XMM1
  2616. PADDW XMM2,[RAX]
  2617. PADDW XMM2,XMM0
  2618. PSRLW XMM2,8
  2619. PACKUSWB XMM2,XMM3
  2620. MOVD [RDX], XMM2
  2621. @NextPixel:
  2622. ADD RDX,4
  2623. DEC R8D
  2624. JNZ @LoopStart
  2625. @Done:
  2626. RET
  2627. @CopyPixel:
  2628. MOV [RDX],ECX
  2629. ADD RDX,4
  2630. DEC R8D
  2631. JNZ @CopyPixel
  2632. {$ENDIF}
  2633. end;
  2634. function BlendRegEx_SSE2(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2635. asm
  2636. // blend foreground color (F) to a background color (B),
  2637. // using alpha channel value of F
  2638. // Result := M * Fa * (Frgb - Brgb) + Brgb
  2639. {$IFDEF TARGET_x86}
  2640. // EAX <- F
  2641. // EDX <- B
  2642. // ECX <- M
  2643. PUSH EBX
  2644. MOV EBX,EAX
  2645. SHR EBX,24
  2646. INC ECX // 255:256 range bias
  2647. IMUL ECX,EBX
  2648. SHR ECX,8
  2649. JZ @1
  2650. PXOR XMM0,XMM0
  2651. MOVD XMM1,EAX
  2652. SHL ECX,4
  2653. MOVD XMM2,EDX
  2654. PUNPCKLBW XMM1,XMM0
  2655. PUNPCKLBW XMM2,XMM0
  2656. ADD ECX,alpha_ptr
  2657. PSUBW XMM1,XMM2
  2658. PMULLW XMM1,[ECX]
  2659. PSLLW XMM2,8
  2660. MOV ECX,bias_ptr
  2661. PADDW XMM2,[ECX]
  2662. PADDW XMM1,XMM2
  2663. PSRLW XMM1,8
  2664. PACKUSWB XMM1,XMM0
  2665. MOVD EAX,XMM1
  2666. POP EBX
  2667. RET
  2668. @1: MOV EAX,EDX
  2669. POP EBX
  2670. {$ENDIF}
  2671. {$IFDEF TARGET_x64}
  2672. // ECX <- F
  2673. // EDX <- B
  2674. // R8D <- M
  2675. MOV EAX,ECX
  2676. SHR EAX,24
  2677. INC R8D // 255:256 range bias
  2678. IMUL R8D,EAX
  2679. SHR R8D,8
  2680. JZ @1
  2681. PXOR XMM0,XMM0
  2682. MOVD XMM1,ECX
  2683. SHL R8D,4
  2684. MOVD XMM2,EDX
  2685. PUNPCKLBW XMM1,XMM0
  2686. PUNPCKLBW XMM2,XMM0
  2687. {$IFNDEF FPC}
  2688. ADD R8,alpha_ptr
  2689. {$ELSE}
  2690. ADD R8,[RIP+alpha_ptr]
  2691. {$ENDIF}
  2692. PSUBW XMM1,XMM2
  2693. PMULLW XMM1,[R8]
  2694. PSLLW XMM2,8
  2695. {$IFNDEF FPC}
  2696. MOV R8,bias_ptr
  2697. {$ELSE}
  2698. MOV R8,[RIP+bias_ptr]
  2699. {$ENDIF}
  2700. PADDW XMM2,[R8]
  2701. PADDW XMM1,XMM2
  2702. PSRLW XMM1,8
  2703. PACKUSWB XMM1,XMM0
  2704. MOVD EAX,XMM1
  2705. RET
  2706. @1: MOV EAX,EDX
  2707. {$ENDIF}
  2708. end;
  2709. procedure BlendMemEx_SSE2(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2710. asm
  2711. {$IFDEF TARGET_x86}
  2712. // blend foreground color (F) to a background color (B),
  2713. // using alpha channel value of F
  2714. // EAX <- F
  2715. // [EDX] <- B
  2716. // ECX <- M
  2717. // Result := M * Fa * (Frgb - Brgb) + Brgb
  2718. TEST EAX,$FF000000
  2719. JZ @2
  2720. PUSH EBX
  2721. MOV EBX,EAX
  2722. SHR EBX,24
  2723. INC ECX // 255:256 range bias
  2724. IMUL ECX,EBX
  2725. SHR ECX,8
  2726. JZ @1
  2727. PXOR XMM0,XMM0
  2728. MOVD XMM1,EAX
  2729. SHL ECX,4
  2730. MOVD XMM2,[EDX]
  2731. PUNPCKLBW XMM1,XMM0
  2732. PUNPCKLBW XMM2,XMM0
  2733. ADD ECX,alpha_ptr
  2734. PSUBW XMM1,XMM2
  2735. PMULLW XMM1,[ECX]
  2736. PSLLW XMM2,8
  2737. MOV ECX,bias_ptr
  2738. PADDW XMM2,[ECX]
  2739. PADDW XMM1,XMM2
  2740. PSRLW XMM1,8
  2741. PACKUSWB XMM1,XMM0
  2742. MOVD [EDX],XMM1
  2743. @1:
  2744. POP EBX
  2745. @2:
  2746. {$ENDIF}
  2747. {$IFDEF TARGET_x64}
  2748. // blend foreground color (F) to a background color (B),
  2749. // using alpha channel value of F
  2750. // RCX <- F
  2751. // [RDX] <- B
  2752. // R8 <- M
  2753. // Result := M * Fa * (Frgb - Brgb) + Brgb
  2754. TEST ECX, $FF000000
  2755. JZ @1
  2756. MOV R9D,ECX
  2757. SHR R9D,24
  2758. INC R8D // 255:256 range bias
  2759. IMUL R8D,R9D
  2760. SHR R8D,8
  2761. JZ @1
  2762. PXOR XMM0,XMM0
  2763. MOVD XMM1,ECX
  2764. SHL R8D,4
  2765. MOVD XMM2,[RDX]
  2766. PUNPCKLBW XMM1,XMM0
  2767. PUNPCKLBW XMM2,XMM0
  2768. {$IFNDEF FPC}
  2769. ADD R8,alpha_ptr
  2770. {$ELSE}
  2771. ADD R8,[RIP+alpha_ptr]
  2772. {$ENDIF}
  2773. PSUBW XMM1,XMM2
  2774. PMULLW XMM1,[R8]
  2775. PSLLW XMM2,8
  2776. {$IFNDEF FPC}
  2777. MOV R8,bias_ptr
  2778. {$ELSE}
  2779. MOV R8,[RIP+bias_ptr]
  2780. {$ENDIF}
  2781. PADDW XMM2,[R8]
  2782. PADDW XMM1,XMM2
  2783. PSRLW XMM1,8
  2784. PACKUSWB XMM1,XMM0
  2785. MOVD DWORD PTR [RDX],XMM1
  2786. @1:
  2787. {$ENDIF}
  2788. end;
  2789. function BlendRegRGB_SSE2(F, B, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2790. asm
  2791. {$IFDEF TARGET_x86}
  2792. PXOR XMM2,XMM2
  2793. MOVD XMM0,EAX
  2794. PUNPCKLBW XMM0,XMM2
  2795. MOVD XMM1,EDX
  2796. PUNPCKLBW XMM1,XMM2
  2797. BSWAP ECX
  2798. PSUBW XMM0,XMM1
  2799. MOVD XMM3,ECX
  2800. PUNPCKLBW XMM3,XMM2
  2801. PMULLW XMM0,XMM3
  2802. MOV EAX,bias_ptr
  2803. PSLLW XMM1,8
  2804. PADDW XMM1,[EAX]
  2805. PADDW XMM1,XMM0
  2806. PSRLW XMM1,8
  2807. PACKUSWB XMM1,XMM2
  2808. MOVD EAX,XMM1
  2809. {$ENDIF}
  2810. {$IFDEF TARGET_x64}
  2811. PXOR XMM2,XMM2
  2812. MOVD XMM0,ECX
  2813. PUNPCKLBW XMM0,XMM2
  2814. MOVD XMM1,EDX
  2815. PUNPCKLBW XMM1,XMM2
  2816. BSWAP R8D
  2817. PSUBW XMM0,XMM1
  2818. MOVD XMM3,R8D
  2819. PUNPCKLBW XMM3,XMM2
  2820. PMULLW XMM0,XMM3
  2821. {$IFNDEF FPC}
  2822. MOV RAX,bias_ptr
  2823. {$ELSE}
  2824. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2825. {$ENDIF}
  2826. PSLLW XMM1,8
  2827. PADDW XMM1,[RAX]
  2828. PADDW XMM1,XMM0
  2829. PSRLW XMM1,8
  2830. PACKUSWB XMM1,XMM2
  2831. MOVD EAX,XMM1
  2832. {$ENDIF}
  2833. end;
  2834. procedure BlendMemRGB_SSE2(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2835. asm
  2836. {$IFDEF TARGET_x86}
  2837. PXOR XMM2,XMM2
  2838. MOVD XMM0,EAX
  2839. PUNPCKLBW XMM0,XMM2
  2840. MOVD XMM1,[EDX]
  2841. PUNPCKLBW XMM1,XMM2
  2842. BSWAP ECX
  2843. PSUBW XMM0,XMM1
  2844. MOVD XMM3,ECX
  2845. PUNPCKLBW XMM3,XMM2
  2846. PMULLW XMM0,XMM3
  2847. MOV EAX,bias_ptr
  2848. PSLLW XMM1,8
  2849. PADDW XMM1,[EAX]
  2850. PADDW XMM1,XMM0
  2851. PSRLW XMM1,8
  2852. PACKUSWB XMM1,XMM2
  2853. MOVD [EDX],XMM1
  2854. {$ENDIF}
  2855. {$IFDEF TARGET_x64}
  2856. MOVD XMM1,R8D
  2857. PXOR XMM4,XMM4
  2858. {$IFNDEF FPC}
  2859. MOV RAX,bias_ptr
  2860. {$ELSE}
  2861. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  2862. {$ENDIF}
  2863. MOVQ XMM5,[RAX]
  2864. MOVD XMM0,ECX
  2865. MOVD XMM2,[RDX]
  2866. PUNPCKLBW XMM0,XMM4
  2867. PUNPCKLBW XMM1,XMM4
  2868. PUNPCKLBW XMM2,XMM4
  2869. PSHUFLW XMM1,XMM1,$1B
  2870. // C = wA B - wB
  2871. PMULLW XMM0,XMM1
  2872. PADDW XMM0,XMM5
  2873. PSRLW XMM0,8
  2874. PADDW XMM0,XMM2
  2875. PMULLW XMM2,XMM1
  2876. PADDW XMM2,XMM5
  2877. PSRLW XMM2,8
  2878. PSUBW XMM0,XMM2
  2879. PACKUSWB XMM0,XMM4
  2880. MOVD [RDX],XMM0
  2881. {$ENDIF}
  2882. end;
  2883. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  2884. procedure BlendMemRGB128_SSE4(F: TColor32; var B: TColor32; W: UInt64); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2885. asm
  2886. {$IFDEF TARGET_x86}
  2887. MOVQ XMM1,W
  2888. PXOR XMM4,XMM4
  2889. MOV ECX,[bias_ptr]
  2890. MOVDQA XMM5,[ECX]
  2891. MOVD XMM0,EAX
  2892. PINSRD XMM0,EAX,1
  2893. MOVQ XMM2,[EDX].QWORD
  2894. PUNPCKLBW XMM0,XMM4
  2895. PUNPCKLBW XMM1,XMM4
  2896. PUNPCKLBW XMM2,XMM4
  2897. PSHUFLW XMM1,XMM1,$1B
  2898. PSHUFHW XMM1,XMM1,$1B
  2899. // C = wA B - wB
  2900. PMULLW XMM0,XMM1
  2901. PADDW XMM0,XMM5
  2902. PSRLW XMM0,8
  2903. PADDW XMM0,XMM2
  2904. PMULLW XMM2,XMM1
  2905. PADDW XMM2,XMM5
  2906. PSRLW XMM2,8
  2907. PSUBW XMM0,XMM2
  2908. PACKUSWB XMM0,XMM4
  2909. MOVQ [EDX].QWORD,XMM0
  2910. {$ENDIF}
  2911. {$IFDEF TARGET_x64}
  2912. MOVQ XMM1,R8
  2913. PXOR XMM4,XMM4
  2914. MOV RAX,[RIP+bias_ptr]
  2915. MOVDQA XMM5,[RAX]
  2916. MOVD XMM0,ECX
  2917. PINSRD XMM0,ECX,1
  2918. MOVQ XMM2,[RDX].QWORD
  2919. PUNPCKLBW XMM0,XMM4
  2920. PUNPCKLBW XMM1,XMM4
  2921. PUNPCKLBW XMM2,XMM4
  2922. PSHUFLW XMM1,XMM1,$1B
  2923. PSHUFHW XMM1,XMM1,$1B
  2924. // C = wA B - wB
  2925. PMULLW XMM0,XMM1
  2926. PADDW XMM0,XMM5
  2927. PSRLW XMM0,8
  2928. PADDW XMM0,XMM2
  2929. PMULLW XMM2,XMM1
  2930. PADDW XMM2,XMM5
  2931. PSRLW XMM2,8
  2932. PSUBW XMM0,XMM2
  2933. PACKUSWB XMM0,XMM4
  2934. MOVQ [RDX].QWORD,XMM0
  2935. {$ENDIF}
  2936. end;
  2937. {$ENDIF}
  2938. procedure BlendLine_SSE2(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  2939. {$IFDEF FPC}
  2940. const
  2941. COpaque: QWORD = $FF000000FF000000;
  2942. {$ENDIF}
  2943. asm
  2944. {$IFDEF TARGET_X86}
  2945. // EAX <- Src
  2946. // EDX <- Dst
  2947. // ECX <- Count
  2948. TEST ECX,ECX
  2949. JLE @3
  2950. PUSH EBX
  2951. PXOR XMM4,XMM4
  2952. MOV EBX,[bias_ptr]
  2953. MOVDQA XMM5,[EBX]
  2954. POP EBX
  2955. TEST ECX, 1
  2956. JZ @2
  2957. MOVD XMM0,[EAX]
  2958. MOVD XMM2,[EDX]
  2959. PUNPCKLBW XMM0,XMM4
  2960. PUNPCKLBW XMM2,XMM4
  2961. PSHUFLW XMM1,XMM0,$FF
  2962. // premultiply source pixel by its alpha
  2963. MOVQ XMM3,XMM1
  2964. PSRLQ XMM3,16
  2965. PMULLW XMM0,XMM3
  2966. PADDW XMM0,XMM5
  2967. PSRLW XMM0,8
  2968. PSLLQ XMM3,48
  2969. POR XMM0,XMM3
  2970. // C' = A' B' - aB'
  2971. PMULLW XMM1,XMM2
  2972. PADDW XMM1,XMM5
  2973. PSRLW XMM1,8
  2974. PADDW XMM0,XMM2
  2975. PSUBW XMM0,XMM1
  2976. PACKUSWB XMM0,XMM4
  2977. MOVD [EDX], XMM0
  2978. @2:
  2979. LEA EAX, [EAX + ECX * 4]
  2980. LEA EDX, [EDX + ECX * 4]
  2981. SHR ECX,1
  2982. JZ @3
  2983. NEG ECX
  2984. @1:
  2985. MOVQ XMM0,[EAX + ECX * 8].QWORD
  2986. MOVQ XMM2,[EDX + ECX * 8].QWORD
  2987. PUNPCKLBW XMM0,XMM4
  2988. PUNPCKLBW XMM2,XMM4
  2989. PSHUFLW XMM1,XMM0,$FF
  2990. PSHUFHW XMM1,XMM1,$FF
  2991. // premultiply source pixel by its alpha
  2992. MOVDQA XMM3,XMM1
  2993. PSRLQ XMM3,16
  2994. PMULLW XMM0,XMM3
  2995. PADDW XMM0,XMM5
  2996. PSRLW XMM0,8
  2997. PSLLQ XMM3,48
  2998. POR XMM0,XMM3
  2999. // C' = A' + B' - aB'
  3000. PMULLW XMM1,XMM2
  3001. PADDW XMM1,XMM5
  3002. PSRLW XMM1,8
  3003. PADDW XMM0,XMM2
  3004. PSUBW XMM0,XMM1
  3005. PACKUSWB XMM0,XMM4
  3006. MOVQ [EDX + ECX * 8].QWORD,XMM0
  3007. ADD ECX,1
  3008. JS @1
  3009. @3:
  3010. {$ENDIF}
  3011. {$IFDEF TARGET_X64}
  3012. TEST R8D,R8D
  3013. JLE @3
  3014. PXOR XMM4,XMM4
  3015. {$IFNDEF FPC}
  3016. MOV RAX,bias_ptr
  3017. {$ELSE}
  3018. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  3019. {$ENDIF}
  3020. MOVDQA XMM5,[RAX]
  3021. MOV R9D, R8D
  3022. SHR R9D, 1
  3023. TEST R9D, R9D
  3024. JZ @2
  3025. @1:
  3026. MOVQ XMM0,[RCX].QWORD
  3027. MOVQ RAX,XMM0
  3028. {$IFDEF FPC}
  3029. AND RAX,[RIP+COpaque]
  3030. JZ @1b
  3031. CMP RAX,[RIP+COpaque]
  3032. JZ @1a
  3033. {$ENDIF}
  3034. MOVQ XMM2,[RDX].QWORD
  3035. PUNPCKLBW XMM0,XMM4
  3036. PUNPCKLBW XMM2,XMM4
  3037. PSHUFLW XMM1,XMM0,$FF
  3038. PSHUFHW XMM1,XMM1,$FF
  3039. // premultiply source pixel by its alpha
  3040. MOVDQA XMM3,XMM1
  3041. PSRLQ XMM3,16
  3042. PMULLW XMM0,XMM3
  3043. PADDW XMM0,XMM5
  3044. PSRLW XMM0,8
  3045. PSLLQ XMM3,48
  3046. POR XMM0,XMM3
  3047. // C' = A' + B' - aB'
  3048. PMULLW XMM1,XMM2
  3049. PADDW XMM1,XMM5
  3050. PSRLW XMM1,8
  3051. PADDW XMM0,XMM2
  3052. PSUBW XMM0,XMM1
  3053. PACKUSWB XMM0,XMM4
  3054. @1a: MOVQ [RDX].QWORD,XMM0
  3055. @1b: ADD RCX,8
  3056. ADD RDX,8
  3057. SUB R9D,1
  3058. JNZ @1
  3059. @2:
  3060. AND R8D, 1
  3061. JZ @3
  3062. MOVD XMM0,[RCX]
  3063. MOVD XMM2,[RDX]
  3064. PUNPCKLBW XMM0,XMM4
  3065. PUNPCKLBW XMM2,XMM4
  3066. PSHUFLW XMM1,XMM0,$FF
  3067. // premultiply source pixel by its alpha
  3068. MOVQ XMM3,XMM1
  3069. PSRLQ XMM3,16
  3070. PMULLW XMM0,XMM3
  3071. PADDW XMM0,XMM5
  3072. PSRLW XMM0,8
  3073. PSLLQ XMM3,48
  3074. POR XMM0,XMM3
  3075. // C' = A' B' - aB'
  3076. PMULLW XMM1,XMM2
  3077. PADDW XMM1,XMM5
  3078. PSRLW XMM1,8
  3079. PADDW XMM0,XMM2
  3080. PSUBW XMM0,XMM1
  3081. PACKUSWB XMM0,XMM4
  3082. MOVD [RDX], XMM0
  3083. @3:
  3084. {$ENDIF}
  3085. end;
  3086. procedure BlendLineEx_SSE2(Src, Dst: PColor32; Count: Integer; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3087. asm
  3088. {$IFDEF TARGET_X86}
  3089. // EAX <- Src
  3090. // EDX <- Dst
  3091. // ECX <- Count
  3092. // test the counter for zero or negativity
  3093. TEST ECX,ECX
  3094. JS @4
  3095. PUSH ESI
  3096. PUSH EDI
  3097. PUSH EBX
  3098. MOV ESI,EAX // ESI <- Src
  3099. MOV EDI,EDX // EDI <- Dst
  3100. MOV EDX,M // EDX <- Master Alpha
  3101. // loop start
  3102. @1: MOV EAX,[ESI]
  3103. TEST EAX,$FF000000
  3104. JZ @3 // complete transparency, proceed to next point
  3105. MOV EBX,EAX
  3106. SHR EBX,24
  3107. INC EBX // 255:256 range bias
  3108. IMUL EBX,EDX
  3109. SHR EBX,8
  3110. JZ @3 // complete transparency, proceed to next point
  3111. // blend
  3112. PXOR XMM0,XMM0
  3113. MOVD XMM1,EAX
  3114. SHL EBX,4
  3115. MOVD XMM2,[EDI]
  3116. PUNPCKLBW XMM1,XMM0
  3117. PUNPCKLBW XMM2,XMM0
  3118. ADD EBX,alpha_ptr
  3119. PSUBW XMM1,XMM2
  3120. PMULLW XMM1,[EBX]
  3121. PSLLW XMM2,8
  3122. MOV EBX,bias_ptr
  3123. PADDW XMM2,[EBX]
  3124. PADDW XMM1,XMM2
  3125. PSRLW XMM1,8
  3126. PACKUSWB XMM1,XMM0
  3127. MOVD EAX,XMM1
  3128. @2: MOV [EDI],EAX
  3129. @3: ADD ESI,4
  3130. ADD EDI,4
  3131. // loop end
  3132. DEC ECX
  3133. JNZ @1
  3134. POP EBX
  3135. POP EDI
  3136. POP ESI
  3137. @4:
  3138. {$ENDIF}
  3139. {$IFDEF TARGET_X64}
  3140. // ECX <- Src
  3141. // EDX <- Dst
  3142. // R8D <- Count
  3143. // R9D <- M
  3144. // test the counter for zero or negativity
  3145. TEST R8D,R8D
  3146. JS @4
  3147. TEST R9D,R9D
  3148. JZ @4
  3149. MOV R10,RCX // ESI <- Src
  3150. // loop start
  3151. @1: MOV ECX,[R10]
  3152. TEST ECX,$FF000000
  3153. JZ @3 // complete transparency, proceed to next point
  3154. MOV EAX,ECX
  3155. SHR EAX,24
  3156. INC EAX // 255:256 range bias
  3157. IMUL EAX,R9D
  3158. SHR EAX,8
  3159. JZ @3 // complete transparency, proceed to next point
  3160. // blend
  3161. PXOR XMM0,XMM0
  3162. MOVD XMM1,ECX
  3163. SHL EAX,4
  3164. MOVD XMM2,[RDX]
  3165. PUNPCKLBW XMM1,XMM0
  3166. PUNPCKLBW XMM2,XMM0
  3167. {$IFNDEF FPC}
  3168. ADD RAX,alpha_ptr
  3169. {$ELSE}
  3170. ADD RAX,[RIP+alpha_ptr]
  3171. {$ENDIF}
  3172. PSUBW XMM1,XMM2
  3173. PMULLW XMM1,[RAX]
  3174. PSLLW XMM2,8
  3175. {$IFNDEF FPC}
  3176. MOV RAX,bias_ptr
  3177. {$ELSE}
  3178. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  3179. {$ENDIF}
  3180. PADDW XMM2,[RAX]
  3181. PADDW XMM1,XMM2
  3182. PSRLW XMM1,8
  3183. PACKUSWB XMM1,XMM0
  3184. MOVD ECX,XMM1
  3185. @2: MOV [RDX],ECX
  3186. @3: ADD R10,4
  3187. ADD RDX,4
  3188. // loop end
  3189. DEC R8D
  3190. JNZ @1
  3191. @4:
  3192. {$ENDIF}
  3193. end;
  3194. function CombineReg_SSE2(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3195. asm
  3196. {$IFDEF TARGET_X86}
  3197. // EAX - Color X
  3198. // EDX - Color Y
  3199. // ECX - Weight of X [0..255]
  3200. // Result := W * (X - Y) + Y
  3201. MOVD XMM1,EAX
  3202. PXOR XMM0,XMM0
  3203. SHL ECX,4
  3204. MOVD XMM2,EDX
  3205. PUNPCKLBW XMM1,XMM0
  3206. PUNPCKLBW XMM2,XMM0
  3207. ADD ECX,alpha_ptr
  3208. PSUBW XMM1,XMM2
  3209. PMULLW XMM1,[ECX]
  3210. PSLLW XMM2,8
  3211. MOV ECX,bias_ptr
  3212. PADDW XMM2,[ECX]
  3213. PADDW XMM1,XMM2
  3214. PSRLW XMM1,8
  3215. PACKUSWB XMM1,XMM0
  3216. MOVD EAX,XMM1
  3217. {$ENDIF}
  3218. {$IFDEF TARGET_X64}
  3219. // ECX - Color X
  3220. // EDX - Color Y
  3221. // R8D - Weight of X [0..255]
  3222. // Result := W * (X - Y) + Y
  3223. MOVD XMM1,ECX
  3224. PXOR XMM0,XMM0
  3225. SHL R8D,4
  3226. MOVD XMM2,EDX
  3227. PUNPCKLBW XMM1,XMM0
  3228. PUNPCKLBW XMM2,XMM0
  3229. {$IFNDEF FPC}
  3230. ADD R8,alpha_ptr
  3231. {$ELSE}
  3232. ADD R8,[RIP+alpha_ptr]
  3233. {$ENDIF}
  3234. PSUBW XMM1,XMM2
  3235. PMULLW XMM1,[R8]
  3236. PSLLW XMM2,8
  3237. {$IFNDEF FPC}
  3238. MOV R8,bias_ptr
  3239. {$ELSE}
  3240. MOV R8,[RIP+bias_ptr]
  3241. {$ENDIF}
  3242. PADDW XMM2,[R8]
  3243. PADDW XMM1,XMM2
  3244. PSRLW XMM1,8
  3245. PACKUSWB XMM1,XMM0
  3246. MOVD EAX,XMM1
  3247. {$ENDIF}
  3248. end;
  3249. procedure CombineMem_SSE2(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3250. asm
  3251. {$IFDEF TARGET_X86}
  3252. // EAX - Color X
  3253. // [EDX] - Color Y
  3254. // ECX - Weight of X [0..255]
  3255. // Result := W * (X - Y) + Y
  3256. JCXZ @1
  3257. CMP ECX,$FF
  3258. JZ @2
  3259. MOVD XMM1,EAX
  3260. PXOR XMM0,XMM0
  3261. SHL ECX,4
  3262. MOVD XMM2,[EDX]
  3263. PUNPCKLBW XMM1,XMM0
  3264. PUNPCKLBW XMM2,XMM0
  3265. ADD ECX,alpha_ptr
  3266. PSUBW XMM1,XMM2
  3267. PMULLW XMM1,[ECX]
  3268. PSLLW XMM2,8
  3269. MOV ECX,bias_ptr
  3270. PADDW XMM2,[ECX]
  3271. PADDW XMM1,XMM2
  3272. PSRLW XMM1,8
  3273. PACKUSWB XMM1,XMM0
  3274. MOVD [EDX],XMM1
  3275. @1: RET
  3276. @2: MOV [EDX],EAX
  3277. {$ENDIF}
  3278. {$IFDEF TARGET_X64}
  3279. // ECX - Color X
  3280. // [RDX] - Color Y
  3281. // R8D - Weight of X [0..255]
  3282. // Result := W * (X - Y) + Y
  3283. TEST R8D,R8D // Set flags for R8
  3284. JZ @1 // W = 0 ? => Result := EDX
  3285. CMP R8D,$FF
  3286. JZ @2
  3287. MOVD XMM1,ECX
  3288. PXOR XMM0,XMM0
  3289. SHL R8D,4
  3290. MOVD XMM2,[RDX]
  3291. PUNPCKLBW XMM1,XMM0
  3292. PUNPCKLBW XMM2,XMM0
  3293. {$IFNDEF FPC}
  3294. ADD R8,alpha_ptr
  3295. {$ELSE}
  3296. ADD R8,[RIP+alpha_ptr]
  3297. {$ENDIF}
  3298. PSUBW XMM1,XMM2
  3299. PMULLW XMM1,[R8]
  3300. PSLLW XMM2,8
  3301. {$IFNDEF FPC}
  3302. MOV RAX,bias_ptr
  3303. {$ELSE}
  3304. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  3305. {$ENDIF}
  3306. PADDW XMM2,[RAX]
  3307. PADDW XMM1,XMM2
  3308. PSRLW XMM1,8
  3309. PACKUSWB XMM1,XMM0
  3310. MOVD [RDX],XMM1
  3311. @1: RET
  3312. @2: MOV [RDX],ECX
  3313. {$ENDIF}
  3314. end;
  3315. procedure CombineLine_SSE2(Src, Dst: PColor32; Count: Integer; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3316. asm
  3317. {$IFDEF TARGET_X86}
  3318. // EAX <- Src
  3319. // EDX <- Dst
  3320. // ECX <- Count
  3321. // Result := W * (X - Y) + Y
  3322. TEST ECX,ECX
  3323. JZ @3
  3324. PUSH EBX
  3325. MOV EBX,W
  3326. TEST EBX,EBX
  3327. JZ @2
  3328. CMP EBX,$FF
  3329. JZ @4
  3330. SHL EBX,4
  3331. ADD EBX,alpha_ptr
  3332. MOVQ XMM3,[EBX]
  3333. MOV EBX,bias_ptr
  3334. MOVQ XMM4,[EBX]
  3335. PXOR XMM0,XMM0
  3336. @1: MOVD XMM1,[EAX]
  3337. MOVD XMM2,[EDX]
  3338. PUNPCKLBW XMM1,XMM0
  3339. PUNPCKLBW XMM2,XMM0
  3340. PSUBW XMM1,XMM2
  3341. PMULLW XMM1,XMM3
  3342. PSLLW XMM2,8
  3343. PADDW XMM2,XMM4
  3344. PADDW XMM1,XMM2
  3345. PSRLW XMM1,8
  3346. PACKUSWB XMM1,XMM0
  3347. MOVD [EDX],XMM1
  3348. ADD EAX,4
  3349. ADD EDX,4
  3350. DEC ECX
  3351. JNZ @1
  3352. @2: POP EBX
  3353. POP EBP
  3354. @3: RET $0004
  3355. @4: SHL ECX,2
  3356. CALL Move
  3357. POP EBX
  3358. {$ENDIF}
  3359. {$IFDEF TARGET_X64}
  3360. // ECX <- Src
  3361. // EDX <- Dst
  3362. // R8D <- Count
  3363. // Result := W * (X - Y) + Y
  3364. TEST R8D,R8D
  3365. JZ @2
  3366. TEST R9D,R9D
  3367. JZ @2
  3368. CMP R9D,$FF
  3369. JZ @3
  3370. SHL R9D,4
  3371. {$IFNDEF FPC}
  3372. ADD R9,alpha_ptr
  3373. {$ELSE}
  3374. ADD R9,[RIP+alpha_ptr]
  3375. {$ENDIF}
  3376. MOVQ XMM3,[R9]
  3377. {$IFNDEF FPC}
  3378. MOV R9,bias_ptr
  3379. {$ELSE}
  3380. MOV R9,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  3381. {$ENDIF}
  3382. MOVQ XMM4,[R9]
  3383. PXOR XMM0,XMM0
  3384. @1: MOVD XMM1,[RCX]
  3385. MOVD XMM2,[RDX]
  3386. PUNPCKLBW XMM1,XMM0
  3387. PUNPCKLBW XMM2,XMM0
  3388. PSUBW XMM1,XMM2
  3389. PMULLW XMM1,XMM3
  3390. PSLLW XMM2,8
  3391. PADDW XMM2,XMM4
  3392. PADDW XMM1,XMM2
  3393. PSRLW XMM1,8
  3394. PACKUSWB XMM1,XMM0
  3395. MOVD [RDX],XMM1
  3396. ADD RCX,4
  3397. ADD RDX,4
  3398. DEC R8D
  3399. JNZ @1
  3400. @2: RET
  3401. @3: SHL R8D,2
  3402. CALL Move
  3403. {$ENDIF}
  3404. end;
  3405. function MergeReg_SSE2(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3406. asm
  3407. { This is an implementation of the merge formula, as described
  3408. in a paper by Bruce Wallace in 1981. Merging is associative,
  3409. that is, A over (B over C) = (A over B) over C. The formula is,
  3410. Ra = Fa + Ba - Fa * Ba
  3411. Rc = (Fa (Fc - Bc * Ba) + Bc * Ba) / Ra
  3412. where
  3413. Rc is the resultant color,
  3414. Ra is the resultant alpha,
  3415. Fc is the foreground color,
  3416. Fa is the foreground alpha,
  3417. Bc is the background color,
  3418. Ba is the background alpha.
  3419. Implementation:
  3420. Ra := 1 - (1 - Fa) * (1 - Ba);
  3421. Wa := Fa / Ra;
  3422. Rc := Bc + Wa * (Fc - Bc);
  3423. // Rc := Bc + Wa * (Fc - Bc)
  3424. (1 - Fa) * (1 - Ba) = 1 - Fa - Ba + Fa * Ba = (1 - Ra)
  3425. }
  3426. {$IFDEF TARGET_X86}
  3427. TEST EAX,$FF000000 // foreground completely transparent =>
  3428. JZ @1 // result = background
  3429. CMP EAX,$FF000000 // foreground completely opaque =>
  3430. JNC @2 // result = foreground
  3431. TEST EDX,$FF000000 // background completely transparent =>
  3432. JZ @2 // result = foreground
  3433. PXOR XMM7,XMM7 // XMM7 <- 00
  3434. MOVD XMM0,EAX // XMM0 <- Fa Fr Fg Fb
  3435. SHR EAX,24 // EAX <- Fa
  3436. ROR EDX,24
  3437. MOVZX ECX,DL // ECX <- Ba
  3438. PUNPCKLBW XMM0,XMM7 // XMM0 <- 00 Fa 00 Fr 00 Fg 00 Fb
  3439. SUB EAX,$FF // EAX <- (Fa - 1)
  3440. XOR ECX,$FF // ECX <- (1 - Ba)
  3441. IMUL ECX,EAX // ECX <- (Fa - 1) * (1 - Ba) = Ra - 1
  3442. IMUL ECX,$8081 // ECX <- Xa 00 00 00
  3443. ADD ECX,$8081*$FF*$FF
  3444. SHR ECX,15 // ECX <- Ra
  3445. MOV DL,CH // EDX <- Br Bg Bb Ra
  3446. ROR EDX,8 // EDX <- Ra Br Bg Bb
  3447. MOVD XMM1,EDX // XMM1 <- Ra Br Bg Bb
  3448. PUNPCKLBW XMM1,XMM7 // XMM1 <- 00 Ra 00 Br 00 Bg 00 Bb
  3449. SHL EAX,20 // EAX <- Fa 00 00
  3450. PSUBW XMM0,XMM1 // XMM0 <- ** Da ** Dr ** Dg ** Db
  3451. ADD EAX,$0FF01000
  3452. PSLLW XMM0,4
  3453. XOR EDX,EDX // EDX <- 00
  3454. DIV ECX // EAX <- Fa / Ra = Wa
  3455. MOVD XMM4,EAX // XMM3 <- Wa
  3456. PSHUFLW XMM4,XMM4,$C0 // XMM3 <- 00 00 ** Wa ** Wa ** Wa
  3457. PMULHW XMM0,XMM4 // XMM0 <- 00 00 ** Pr ** Pg ** Pb
  3458. PADDW XMM0,XMM1 // XMM0 <- 00 Ra 00 Rr 00 Rg 00 Rb
  3459. PACKUSWB XMM0,XMM7 // XMM0 <- Ra Rr Rg Rb
  3460. MOVD EAX,XMM0
  3461. RET
  3462. @1: MOV EAX,EDX
  3463. @2:
  3464. {$ENDIF}
  3465. {$IFDEF TARGET_X64}
  3466. TEST ECX,$FF000000 // foreground completely transparent =>
  3467. JZ @1 // result = background
  3468. MOV EAX,ECX // EAX <- Fa
  3469. CMP EAX,$FF000000 // foreground completely opaque =>
  3470. JNC @2 // result = foreground
  3471. TEST EDX,$FF000000 // background completely transparent =>
  3472. JZ @2 // result = foreground
  3473. PXOR XMM7,XMM7 // XMM7 <- 00
  3474. MOVD XMM0,EAX // XMM0 <- Fa Fr Fg Fb
  3475. SHR EAX,24 // EAX <- Fa
  3476. ROR EDX,24
  3477. MOVZX ECX,DL // ECX <- Ba
  3478. PUNPCKLBW XMM0,XMM7 // XMM0 <- 00 Fa 00 Fr 00 Fg 00 Fb
  3479. SUB EAX,$FF // EAX <- (Fa - 1)
  3480. XOR ECX,$FF // ECX <- (1 - Ba)
  3481. IMUL ECX,EAX // ECX <- (Fa - 1) * (1 - Ba) = Ra - 1
  3482. IMUL ECX,$8081 // ECX <- Xa 00 00 00
  3483. ADD ECX,$8081*$FF*$FF
  3484. SHR ECX,15 // ECX <- Ra
  3485. MOV DL,CH // EDX <- Br Bg Bb Ra
  3486. ROR EDX,8 // EDX <- Ra Br Bg Bb
  3487. MOVD XMM1,EDX // XMM1 <- Ra Br Bg Bb
  3488. PUNPCKLBW XMM1,XMM7 // XMM1 <- 00 Ra 00 Br 00 Bg 00 Bb
  3489. SHL EAX,20 // EAX <- Fa 00 00
  3490. PSUBW XMM0,XMM1 // XMM0 <- ** Da ** Dr ** Dg ** Db
  3491. ADD EAX,$0FF01000
  3492. PSLLW XMM0,4
  3493. XOR EDX,EDX // EDX <- 00
  3494. DIV ECX // EAX <- Fa / Ra = Wa
  3495. MOVD XMM4,EAX // XMM3 <- Wa
  3496. PSHUFLW XMM4,XMM4,$C0 // XMM3 <- 00 00 ** Wa ** Wa ** Wa
  3497. PMULHW XMM0,XMM4 // XMM0 <- 00 00 ** Pr ** Pg ** Pb
  3498. PADDW XMM0,XMM1 // XMM0 <- 00 Ra 00 Rr 00 Rg 00 Rb
  3499. PACKUSWB XMM0,XMM7 // XMM0 <- Ra Rr Rg Rb
  3500. MOVD EAX,XMM0
  3501. RET
  3502. @1: MOV EAX,EDX
  3503. @2:
  3504. {$ENDIF}
  3505. end;
  3506. procedure EMMS_SSE2; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3507. asm
  3508. end;
  3509. function LightenReg_SSE2(C: TColor32; Amount: Integer): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3510. asm
  3511. {$IFDEF TARGET_X86}
  3512. MOVD XMM0,EAX
  3513. TEST EDX,EDX
  3514. JL @1
  3515. IMUL EDX,$010101
  3516. MOVD XMM1,EDX
  3517. PADDUSB XMM0,XMM1
  3518. MOVD EAX,XMM0
  3519. RET
  3520. @1: NEG EDX
  3521. IMUL EDX,$010101
  3522. MOVD XMM1,EDX
  3523. PSUBUSB XMM0,XMM1
  3524. MOVD EAX,XMM0
  3525. {$ENDIF}
  3526. {$IFDEF TARGET_X64}
  3527. MOVD XMM0,ECX
  3528. TEST EDX,EDX
  3529. JL @1
  3530. IMUL EDX,$010101
  3531. MOVD XMM1,EDX
  3532. PADDUSB XMM0,XMM1
  3533. MOVD EAX,XMM0
  3534. RET
  3535. @1: NEG EDX
  3536. IMUL EDX,$010101
  3537. MOVD XMM1,EDX
  3538. PSUBUSB XMM0,XMM1
  3539. MOVD EAX,XMM0
  3540. {$ENDIF}
  3541. end;
  3542. { SSE2 Color algebra}
  3543. function ColorAdd_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3544. asm
  3545. {$IFDEF TARGET_X86}
  3546. MOVD XMM0,EAX
  3547. MOVD XMM1,EDX
  3548. PADDUSB XMM0,XMM1
  3549. MOVD EAX,XMM0
  3550. {$ENDIF}
  3551. {$IFDEF TARGET_X64}
  3552. MOVD XMM0,ECX
  3553. MOVD XMM1,EDX
  3554. PADDUSB XMM0,XMM1
  3555. MOVD EAX,XMM0
  3556. {$ENDIF}
  3557. end;
  3558. function ColorSub_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3559. asm
  3560. {$IFDEF TARGET_X86}
  3561. MOVD XMM0,EAX
  3562. MOVD XMM1,EDX
  3563. PSUBUSB XMM0,XMM1
  3564. MOVD EAX,XMM0
  3565. {$ENDIF}
  3566. {$IFDEF TARGET_X64}
  3567. MOVD XMM0,ECX
  3568. MOVD XMM1,EDX
  3569. PSUBUSB XMM0,XMM1
  3570. MOVD EAX,XMM0
  3571. {$ENDIF}
  3572. end;
  3573. function ColorModulate_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3574. asm
  3575. {$IFDEF TARGET_X86}
  3576. PXOR XMM2,XMM2
  3577. MOVD XMM0,EAX
  3578. PUNPCKLBW XMM0,XMM2
  3579. MOVD XMM1,EDX
  3580. PUNPCKLBW XMM1,XMM2
  3581. PMULLW XMM0,XMM1
  3582. PSRLW XMM0,8
  3583. PACKUSWB XMM0,XMM2
  3584. MOVD EAX,XMM0
  3585. {$ENDIF}
  3586. {$IFDEF TARGET_X64}
  3587. PXOR XMM2,XMM2
  3588. MOVD XMM0,ECX
  3589. PUNPCKLBW XMM0,XMM2
  3590. MOVD XMM1,EDX
  3591. PUNPCKLBW XMM1,XMM2
  3592. PMULLW XMM0,XMM1
  3593. PSRLW XMM0,8
  3594. PACKUSWB XMM0,XMM2
  3595. MOVD EAX,XMM0
  3596. {$ENDIF}
  3597. end;
  3598. function ColorMax_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3599. asm
  3600. {$IFDEF TARGET_X86}
  3601. MOVD XMM0,EAX
  3602. MOVD XMM1,EDX
  3603. PMAXUB XMM0,XMM1
  3604. MOVD EAX,XMM0
  3605. {$ENDIF}
  3606. {$IFDEF TARGET_X64}
  3607. MOVD XMM0,ECX
  3608. MOVD XMM1,EDX
  3609. PMAXUB XMM0,XMM1
  3610. MOVD EAX,XMM0
  3611. {$ENDIF}
  3612. end;
  3613. function ColorMin_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3614. asm
  3615. {$IFDEF TARGET_X86}
  3616. MOVD XMM0,EAX
  3617. MOVD XMM1,EDX
  3618. PMINUB XMM0,XMM1
  3619. MOVD EAX,XMM0
  3620. {$ENDIF}
  3621. {$IFDEF TARGET_X64}
  3622. MOVD XMM0,ECX
  3623. MOVD XMM1,EDX
  3624. PMINUB XMM0,XMM1
  3625. MOVD EAX,XMM0
  3626. {$ENDIF}
  3627. end;
  3628. function ColorDifference_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3629. asm
  3630. {$IFDEF TARGET_X86}
  3631. MOVD XMM0,EAX
  3632. MOVD XMM1,EDX
  3633. MOVQ XMM2,XMM0
  3634. PSUBUSB XMM0,XMM1
  3635. PSUBUSB XMM1,XMM2
  3636. POR XMM0,XMM1
  3637. MOVD EAX,XMM0
  3638. {$ENDIF}
  3639. {$IFDEF TARGET_X64}
  3640. MOVD XMM0,ECX
  3641. MOVD XMM1,EDX
  3642. MOVQ XMM2,XMM0
  3643. PSUBUSB XMM0,XMM1
  3644. PSUBUSB XMM1,XMM2
  3645. POR XMM0,XMM1
  3646. MOVD EAX,XMM0
  3647. {$ENDIF}
  3648. end;
  3649. function ColorExclusion_SSE2(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3650. asm
  3651. {$IFDEF TARGET_X86}
  3652. PXOR XMM2,XMM2
  3653. MOVD XMM0,EAX
  3654. PUNPCKLBW XMM0,XMM2
  3655. MOVD XMM1,EDX
  3656. PUNPCKLBW XMM1,XMM2
  3657. MOVQ XMM3,XMM0
  3658. PADDW XMM0,XMM1
  3659. PMULLW XMM1,XMM3
  3660. PSRLW XMM1,7
  3661. PSUBUSW XMM0,XMM1
  3662. PACKUSWB XMM0,XMM2
  3663. MOVD EAX,XMM0
  3664. {$ENDIF}
  3665. {$IFDEF TARGET_X64}
  3666. PXOR XMM2,XMM2
  3667. MOVD XMM0,ECX
  3668. PUNPCKLBW XMM0,XMM2
  3669. MOVD XMM1,EDX
  3670. PUNPCKLBW XMM1,XMM2
  3671. MOVQ XMM3,XMM0
  3672. PADDW XMM0,XMM1
  3673. PMULLW XMM1,XMM3
  3674. PSRLW XMM1,7
  3675. PSUBUSW XMM0,XMM1
  3676. PACKUSWB XMM0,XMM2
  3677. MOVD EAX,XMM0
  3678. {$ENDIF}
  3679. end;
  3680. function ColorScale_SSE2(C, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  3681. asm
  3682. {$IFDEF TARGET_X86}
  3683. PXOR XMM2,XMM2
  3684. SHL EDX,4
  3685. MOVD XMM0,EAX
  3686. PUNPCKLBW XMM0,XMM2
  3687. ADD EDX,alpha_ptr
  3688. PMULLW XMM0,[EDX]
  3689. PSRLW XMM0,8
  3690. PACKUSWB XMM0,XMM2
  3691. MOVD EAX,XMM0
  3692. {$ENDIF}
  3693. {$IFDEF TARGET_X64}
  3694. PXOR XMM2,XMM2
  3695. SHL RDX,4
  3696. MOVD XMM0,ECX
  3697. PUNPCKLBW XMM0,XMM2
  3698. {$IFNDEF FPC}
  3699. ADD RDX,alpha_ptr
  3700. {$ELSE}
  3701. ADD RDX,[RIP+alpha_ptr]
  3702. {$ENDIF}
  3703. PMULLW XMM0,[RDX]
  3704. PSRLW XMM0,8
  3705. PACKUSWB XMM0,XMM2
  3706. MOVD EAX,XMM0
  3707. {$ENDIF}
  3708. end;
  3709. {$ENDIF}
  3710. {$ENDIF}
  3711. { Misc stuff }
  3712. function Lighten(C: TColor32; Amount: Integer): TColor32;
  3713. begin
  3714. Result := LightenReg(C, Amount);
  3715. end;
  3716. procedure MakeMergeTables;
  3717. var
  3718. I, J: Integer;
  3719. const
  3720. OneByteth : Double = 1 / 255;
  3721. begin
  3722. for J := 0 to 255 do
  3723. begin
  3724. DivTable[0, J] := 0;
  3725. RcTable[0, J] := 0;
  3726. end;
  3727. for J := 0 to 255 do
  3728. for I := 1 to 255 do
  3729. begin
  3730. DivTable[I, J] := Round(I * J * OneByteth);
  3731. RcTable[I, J] := Round(J * 255 / I)
  3732. end;
  3733. end;
  3734. const
  3735. FID_EMMS = 0;
  3736. FID_MERGEREG = 1;
  3737. FID_MERGEMEM = 2;
  3738. FID_MERGELINE = 3;
  3739. FID_MERGEREGEX = 4;
  3740. FID_MERGEMEMEX = 5;
  3741. FID_MERGELINEEX = 6;
  3742. FID_COMBINEREG = 7;
  3743. FID_COMBINEMEM = 8;
  3744. FID_COMBINELINE = 9;
  3745. FID_BLENDREG = 10;
  3746. FID_BLENDMEM = 11;
  3747. FID_BLENDMEMS = 12;
  3748. FID_BLENDLINE = 13;
  3749. FID_BLENDREGEX = 14;
  3750. FID_BLENDMEMEX = 15;
  3751. FID_BLENDLINEEX = 16;
  3752. FID_COLORMAX = 17;
  3753. FID_COLORMIN = 18;
  3754. FID_COLORAVERAGE = 19;
  3755. FID_COLORADD = 20;
  3756. FID_COLORSUB = 21;
  3757. FID_COLORDIV = 22;
  3758. FID_COLORMODULATE = 23;
  3759. FID_COLORDIFFERENCE = 24;
  3760. FID_COLOREXCLUSION = 25;
  3761. FID_COLORSCALE = 26;
  3762. FID_LIGHTEN = 27;
  3763. FID_BLENDREGRGB = 28;
  3764. FID_BLENDMEMRGB = 29;
  3765. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  3766. FID_BLENDMEMRGB128 = 30;
  3767. {$ENDIF}
  3768. procedure RegisterBindings;
  3769. begin
  3770. BlendRegistry := NewRegistry('GR32_Blend bindings');
  3771. {$IFNDEF OMIT_MMX}
  3772. BlendRegistry.RegisterBinding(FID_EMMS, @@EMMS);
  3773. {$ENDIF}
  3774. BlendRegistry.RegisterBinding(FID_MERGEREG, @@MergeReg);
  3775. BlendRegistry.RegisterBinding(FID_MERGEMEM, @@MergeMem);
  3776. BlendRegistry.RegisterBinding(FID_MERGELINE, @@MergeLine);
  3777. BlendRegistry.RegisterBinding(FID_MERGEREGEX, @@MergeRegEx);
  3778. BlendRegistry.RegisterBinding(FID_MERGEMEMEX, @@MergeMemEx);
  3779. BlendRegistry.RegisterBinding(FID_MERGELINEEX, @@MergeLineEx);
  3780. BlendRegistry.RegisterBinding(FID_COMBINEREG, @@CombineReg);
  3781. BlendRegistry.RegisterBinding(FID_COMBINEMEM, @@CombineMem);
  3782. BlendRegistry.RegisterBinding(FID_COMBINELINE, @@CombineLine);
  3783. BlendRegistry.RegisterBinding(FID_BLENDREG, @@BlendReg);
  3784. BlendRegistry.RegisterBinding(FID_BLENDMEM, @@BlendMem);
  3785. BlendRegistry.RegisterBinding(FID_BLENDMEMS, @@BlendMems);
  3786. BlendRegistry.RegisterBinding(FID_BLENDLINE, @@BlendLine);
  3787. BlendRegistry.RegisterBinding(FID_BLENDREGEX, @@BlendRegEx);
  3788. BlendRegistry.RegisterBinding(FID_BLENDMEMEX, @@BlendMemEx);
  3789. BlendRegistry.RegisterBinding(FID_BLENDLINEEX, @@BlendLineEx);
  3790. BlendRegistry.RegisterBinding(FID_COLORMAX, @@ColorMax);
  3791. BlendRegistry.RegisterBinding(FID_COLORMIN, @@ColorMin);
  3792. BlendRegistry.RegisterBinding(FID_COLORAVERAGE, @@ColorAverage);
  3793. BlendRegistry.RegisterBinding(FID_COLORADD, @@ColorAdd);
  3794. BlendRegistry.RegisterBinding(FID_COLORSUB, @@ColorSub);
  3795. BlendRegistry.RegisterBinding(FID_COLORDIV, @@ColorDiv);
  3796. BlendRegistry.RegisterBinding(FID_COLORMODULATE, @@ColorModulate);
  3797. BlendRegistry.RegisterBinding(FID_COLORDIFFERENCE, @@ColorDifference);
  3798. BlendRegistry.RegisterBinding(FID_COLOREXCLUSION, @@ColorExclusion);
  3799. BlendRegistry.RegisterBinding(FID_COLORSCALE, @@ColorScale);
  3800. BlendRegistry.RegisterBinding(FID_LIGHTEN, @@LightenReg);
  3801. BlendRegistry.RegisterBinding(FID_BLENDREGRGB, @@BlendRegRGB);
  3802. BlendRegistry.RegisterBinding(FID_BLENDMEMRGB, @@BlendMemRGB);
  3803. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  3804. BlendRegistry.RegisterBinding(FID_BLENDMEMRGB128, @@BlendMemRGB128);
  3805. {$ENDIF}
  3806. // pure pascal
  3807. BlendRegistry.Add(FID_EMMS, @EMMS_Pas);
  3808. BlendRegistry.Add(FID_MERGEREG, @MergeReg_Pas);
  3809. BlendRegistry.Add(FID_MERGEMEM, @MergeMem_Pas);
  3810. BlendRegistry.Add(FID_MERGEMEMEX, @MergeMemEx_Pas);
  3811. BlendRegistry.Add(FID_MERGEREGEX, @MergeRegEx_Pas);
  3812. BlendRegistry.Add(FID_MERGELINE, @MergeLine_Pas);
  3813. BlendRegistry.Add(FID_MERGELINEEX, @MergeLineEx_Pas);
  3814. BlendRegistry.Add(FID_COLORDIV, @ColorDiv_Pas);
  3815. BlendRegistry.Add(FID_COLORAVERAGE, @ColorAverage_Pas);
  3816. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_Pas);
  3817. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_Pas);
  3818. BlendRegistry.Add(FID_COMBINELINE, @CombineLine_Pas);
  3819. BlendRegistry.Add(FID_BLENDREG, @BlendReg_Pas);
  3820. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_Pas);
  3821. BlendRegistry.Add(FID_BLENDMEMS, @BlendMems_Pas);
  3822. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_Pas);
  3823. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_Pas);
  3824. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_Pas);
  3825. BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_Pas);
  3826. BlendRegistry.Add(FID_COLORMAX, @ColorMax_Pas);
  3827. BlendRegistry.Add(FID_COLORMIN, @ColorMin_Pas);
  3828. BlendRegistry.Add(FID_COLORADD, @ColorAdd_Pas);
  3829. BlendRegistry.Add(FID_COLORSUB, @ColorSub_Pas);
  3830. BlendRegistry.Add(FID_COLORMODULATE, @ColorModulate_Pas);
  3831. BlendRegistry.Add(FID_COLORDIFFERENCE, @ColorDifference_Pas);
  3832. BlendRegistry.Add(FID_COLOREXCLUSION, @ColorExclusion_Pas);
  3833. BlendRegistry.Add(FID_COLORSCALE, @ColorScale_Pas);
  3834. BlendRegistry.Add(FID_LIGHTEN, @LightenReg_Pas);
  3835. BlendRegistry.Add(FID_BLENDREGRGB, @BlendRegRGB_Pas);
  3836. BlendRegistry.Add(FID_BLENDMEMRGB, @BlendMemRGB_Pas);
  3837. {$IFNDEF PUREPASCAL}
  3838. BlendRegistry.Add(FID_EMMS, @EMMS_ASM, []);
  3839. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_ASM, []);
  3840. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_ASM, []);
  3841. BlendRegistry.Add(FID_BLENDREG, @BlendReg_ASM, []);
  3842. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_ASM, []);
  3843. BlendRegistry.Add(FID_BLENDMEMS, @BlendMems_ASM, []);
  3844. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_ASM, []);
  3845. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_ASM, []);
  3846. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_ASM, []);
  3847. BlendRegistry.Add(FID_LIGHTEN, @LightenReg_Pas, []); // no ASM version available
  3848. {$IFNDEF OMIT_MMX}
  3849. BlendRegistry.Add(FID_EMMS, @EMMS_MMX, [ciMMX]);
  3850. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_MMX, [ciMMX]);
  3851. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_MMX, [ciMMX]);
  3852. BlendRegistry.Add(FID_COMBINELINE, @CombineLine_MMX, [ciMMX]);
  3853. BlendRegistry.Add(FID_BLENDREG, @BlendReg_MMX, [ciMMX]);
  3854. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_MMX, [ciMMX]);
  3855. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_MMX, [ciMMX]);
  3856. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_MMX, [ciMMX]);
  3857. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_MMX, [ciMMX]);
  3858. BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_MMX, [ciMMX]);
  3859. BlendRegistry.Add(FID_COLORMAX, @ColorMax_EMMX, [ciEMMX]);
  3860. BlendRegistry.Add(FID_COLORMIN, @ColorMin_EMMX, [ciEMMX]);
  3861. BlendRegistry.Add(FID_COLORADD, @ColorAdd_MMX, [ciMMX]);
  3862. BlendRegistry.Add(FID_COLORSUB, @ColorSub_MMX, [ciMMX]);
  3863. BlendRegistry.Add(FID_COLORMODULATE, @ColorModulate_MMX, [ciMMX]);
  3864. BlendRegistry.Add(FID_COLORDIFFERENCE, @ColorDifference_MMX, [ciMMX]);
  3865. BlendRegistry.Add(FID_COLOREXCLUSION, @ColorExclusion_MMX, [ciMMX]);
  3866. BlendRegistry.Add(FID_COLORSCALE, @ColorScale_MMX, [ciMMX]);
  3867. BlendRegistry.Add(FID_LIGHTEN, @LightenReg_MMX, [ciMMX]);
  3868. BlendRegistry.Add(FID_BLENDREGRGB, @BlendRegRGB_MMX, [ciMMX]);
  3869. BlendRegistry.Add(FID_BLENDMEMRGB, @BlendMemRGB_MMX, [ciMMX]);
  3870. {$ENDIF}
  3871. {$IFNDEF OMIT_SSE2}
  3872. BlendRegistry.Add(FID_EMMS, @EMMS_SSE2, [ciSSE2]);
  3873. BlendRegistry.Add(FID_MERGEREG, @MergeReg_SSE2, [ciSSE2]);
  3874. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_SSE2, [ciSSE2]);
  3875. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_SSE2, [ciSSE2]);
  3876. BlendRegistry.Add(FID_COMBINELINE, @CombineLine_SSE2, [ciSSE2]);
  3877. BlendRegistry.Add(FID_BLENDREG, @BlendReg_SSE2, [ciSSE2]);
  3878. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_SSE2, [ciSSE2]);
  3879. BlendRegistry.Add(FID_BLENDMEMS, @BlendMems_SSE2, [ciSSE2]);
  3880. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_SSE2, [ciSSE2]);
  3881. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_SSE2, [ciSSE2]);
  3882. BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_SSE2, [ciSSE2]);
  3883. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_SSE2, [ciSSE2]);
  3884. BlendRegistry.Add(FID_COLORMAX, @ColorMax_SSE2, [ciSSE2]);
  3885. BlendRegistry.Add(FID_COLORMIN, @ColorMin_SSE2, [ciSSE2]);
  3886. BlendRegistry.Add(FID_COLORADD, @ColorAdd_SSE2, [ciSSE2]);
  3887. BlendRegistry.Add(FID_COLORSUB, @ColorSub_SSE2, [ciSSE2]);
  3888. BlendRegistry.Add(FID_COLORMODULATE, @ColorModulate_SSE2, [ciSSE2]);
  3889. BlendRegistry.Add(FID_COLORDIFFERENCE, @ColorDifference_SSE2, [ciSSE2]);
  3890. BlendRegistry.Add(FID_COLOREXCLUSION, @ColorExclusion_SSE2, [ciSSE2]);
  3891. BlendRegistry.Add(FID_COLORSCALE, @ColorScale_SSE2, [ciSSE2]);
  3892. BlendRegistry.Add(FID_LIGHTEN, @LightenReg_SSE2, [ciSSE]);
  3893. BlendRegistry.Add(FID_BLENDREGRGB, @BlendRegRGB_SSE2, [ciSSE2]);
  3894. BlendRegistry.Add(FID_BLENDMEMRGB, @BlendMemRGB_SSE2, [ciSSE2]);
  3895. {$IFDEF TEST_BLENDMEMRGB128SSE4}
  3896. BlendRegistry.Add(FID_BLENDMEMRGB128, @BlendMemRGB128_SSE4, [ciSSE2]);
  3897. {$ENDIF}
  3898. {$ENDIF}
  3899. {$IFNDEF TARGET_x64}
  3900. BlendRegistry.Add(FID_MERGEREG, @MergeReg_ASM, []);
  3901. {$ENDIF}
  3902. {$ENDIF}
  3903. BlendRegistry.RebindAll;
  3904. end;
  3905. initialization
  3906. RegisterBindings;
  3907. MakeMergeTables;
  3908. {$IFNDEF PUREPASCAL}
  3909. MMX_ACTIVE := (ciMMX in CPUFeatures);
  3910. if [ciMMX, ciSSE2] * CPUFeatures <> [] then
  3911. GenAlphaTable;
  3912. {$ELSE}
  3913. MMX_ACTIVE := False;
  3914. {$ENDIF}
  3915. finalization
  3916. {$IFNDEF PUREPASCAL}
  3917. if [ciMMX, ciSSE2] * CPUFeatures <> [] then
  3918. FreeAlphaTable;
  3919. {$ENDIF}
  3920. end.