GR32_BlendMMX.pas 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
  1. unit GR32_BlendMMX;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is Graphics32
  23. *
  24. * The Initial Developer of the Original Code is
  25. * Alex A. Denisov
  26. *
  27. * Portions created by the Initial Developer are Copyright (C) 2000-2009
  28. * the Initial Developer. All Rights Reserved.
  29. *
  30. * Contributor(s):
  31. * Christian-W. Budde
  32. * - 2019/04/01 - Refactoring
  33. *
  34. * ***** END LICENSE BLOCK ***** *)
  35. interface
  36. {$I GR32.inc}
  37. uses
  38. GR32;
  39. {$IFNDEF OMIT_MMX}
  40. function BlendReg_MMX(F, B: TColor32): TColor32;
  41. procedure BlendMem_MMX(F: TColor32; var B: TColor32);
  42. function BlendRegEx_MMX(F, B: TColor32; M: Cardinal): TColor32;
  43. procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: Cardinal);
  44. function BlendRegRGB_MMX(F, B: TColor32; W: Cardinal): TColor32;
  45. procedure BlendMemRGB_MMX(F: TColor32; var B: TColor32; W: Cardinal);
  46. procedure BlendLine_MMX(Src, Dst: PColor32; Count: Integer);
  47. procedure BlendLineEx_MMX(Src, Dst: PColor32; Count: Integer; M: Cardinal);
  48. function CombineReg_MMX(X, Y: TColor32; W: Cardinal): TColor32;
  49. procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: Cardinal);
  50. procedure CombineLine_MMX(Src, Dst: PColor32; Count: Integer; W: Cardinal);
  51. procedure EMMS_MMX;
  52. function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32;
  53. function ColorAdd_MMX(C1, C2: TColor32): TColor32;
  54. function ColorSub_MMX(C1, C2: TColor32): TColor32;
  55. function ColorModulate_MMX(C1, C2: TColor32): TColor32;
  56. function ColorMax_EMMX(C1, C2: TColor32): TColor32;
  57. function ColorMin_EMMX(C1, C2: TColor32): TColor32;
  58. function ColorDifference_MMX(C1, C2: TColor32): TColor32;
  59. function ColorExclusion_MMX(C1, C2: TColor32): TColor32;
  60. function ColorScale_MMX(C: TColor32; W: Cardinal): TColor32;
  61. {$ENDIF}
  62. implementation
  63. uses
  64. GR32_Blend,
  65. GR32_LowLevel,
  66. GR32_System;
  67. const
  68. BlendRegistryPriorityMMX = -512;
  69. { MMX versions }
  70. function BlendReg_MMX(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  71. asm
  72. // blend foreground color (F) to a background color (B),
  73. // using alpha channel value of F
  74. {$IFDEF TARGET_x86}
  75. // EAX <- F
  76. // EDX <- B
  77. // Result := Fa * (Fargb - Bargb) + Bargb
  78. MOVD MM0,EAX
  79. PXOR MM3,MM3
  80. MOVD MM2,EDX
  81. PUNPCKLBW MM0,MM3
  82. MOV ECX,bias_ptr
  83. PUNPCKLBW MM2,MM3
  84. MOVQ MM1,MM0
  85. PUNPCKHWD MM1,MM1
  86. PSUBW MM0,MM2
  87. PUNPCKHDQ MM1,MM1
  88. PSLLW MM2,8
  89. PMULLW MM0,MM1
  90. PADDW MM2,[ECX]
  91. PADDW MM2,MM0
  92. PSRLW MM2,8
  93. PACKUSWB MM2,MM3
  94. MOVD EAX,MM2
  95. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  96. {$ENDIF}
  97. {$IFDEF TARGET_x64}
  98. // ECX <- F
  99. // EDX <- B
  100. // Result := Fa * (Fargb - Bargb) + Bargb
  101. MOVD MM0,ECX
  102. PXOR MM3,MM3
  103. MOVD MM2,EDX
  104. PUNPCKLBW MM0,MM3
  105. {$IFNDEF FPC}
  106. MOV RAX,bias_ptr
  107. {$ELSE}
  108. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  109. {$ENDIF}
  110. PUNPCKLBW MM2,MM3
  111. MOVQ MM1,MM0
  112. PUNPCKHWD MM1,MM1
  113. PSUBW MM0,MM2
  114. PUNPCKHDQ MM1,MM1
  115. PSLLW MM2,8
  116. PMULLW MM0,MM1
  117. PADDW MM2,[RAX]
  118. PADDW MM2,MM0
  119. PSRLW MM2,8
  120. PACKUSWB MM2,MM3
  121. MOVD EAX,MM2
  122. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  123. {$ENDIF}
  124. end;
  125. {$IFDEF TARGET_x86}
  126. procedure BlendMem_MMX(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  127. asm
  128. // EAX - Color X
  129. // [EDX] - Color Y
  130. // Result := W * (X - Y) + Y
  131. TEST EAX,$FF000000
  132. JZ @1
  133. CMP EAX,$FF000000
  134. JNC @2
  135. PXOR MM3,MM3
  136. MOVD MM0,EAX
  137. MOVD MM2,[EDX]
  138. PUNPCKLBW MM0,MM3
  139. MOV ECX,bias_ptr
  140. PUNPCKLBW MM2,MM3
  141. MOVQ MM1,MM0
  142. PUNPCKHWD MM1,MM1
  143. PSUBW MM0,MM2
  144. PUNPCKHDQ MM1,MM1
  145. PSLLW MM2,8
  146. PMULLW MM0,MM1
  147. PADDW MM2,[ECX]
  148. PADDW MM2,MM0
  149. PSRLW MM2,8
  150. PACKUSWB MM2,MM3
  151. MOVD [EDX],MM2
  152. OR [EDX],$FF000000
  153. @1: RET
  154. @2: MOV [EDX],EAX
  155. end;
  156. function BlendRegEx_MMX(F, B: TColor32; M: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  157. asm
  158. // blend foreground color (F) to a background color (B),
  159. // using alpha channel value of F
  160. // EAX <- F
  161. // EDX <- B
  162. // ECX <- M
  163. // Result := M * Fa * (Fargb - Bargb) + Bargb
  164. PUSH EBX
  165. MOV EBX,EAX
  166. SHR EBX,24
  167. INC ECX // 255:256 range bias
  168. IMUL ECX,EBX
  169. SHR ECX,8
  170. JZ @1
  171. PXOR MM0,MM0
  172. MOVD MM1,EAX
  173. SHL ECX,4
  174. MOVD MM2,EDX
  175. PUNPCKLBW MM1,MM0
  176. PUNPCKLBW MM2,MM0
  177. ADD ECX,alpha_ptr
  178. PSUBW MM1,MM2
  179. PMULLW MM1,[ECX]
  180. PSLLW MM2,8
  181. MOV ECX,bias_ptr
  182. PADDW MM2,[ECX]
  183. PADDW MM1,MM2
  184. PSRLW MM1,8
  185. PACKUSWB MM1,MM0
  186. MOVD EAX,MM1
  187. POP EBX
  188. RET
  189. @1: MOV EAX,EDX
  190. POP EBX
  191. end;
  192. {$ENDIF}
  193. procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  194. asm
  195. {$IFDEF TARGET_x86}
  196. // blend foreground color (F) to a background color (B),
  197. // using alpha channel value of F
  198. // EAX <- F
  199. // [EDX] <- B
  200. // ECX <- M
  201. // Result := M * Fa * (Fargb - Bargb) + Bargb
  202. TEST EAX,$FF000000
  203. JZ @2
  204. PUSH EBX
  205. MOV EBX,EAX
  206. SHR EBX,24
  207. INC ECX // 255:256 range bias
  208. IMUL ECX,EBX
  209. SHR ECX,8
  210. JZ @1
  211. PXOR MM0,MM0
  212. MOVD MM1,EAX
  213. SHL ECX,4
  214. MOVD MM2,[EDX]
  215. PUNPCKLBW MM1,MM0
  216. PUNPCKLBW MM2,MM0
  217. ADD ECX,alpha_ptr
  218. PSUBW MM1,MM2
  219. PMULLW MM1,[ECX]
  220. PSLLW MM2,8
  221. MOV ECX,bias_ptr
  222. PADDW MM2,[ECX]
  223. PADDW MM1,MM2
  224. PSRLW MM1,8
  225. PACKUSWB MM1,MM0
  226. MOVD [EDX],MM1
  227. @1: POP EBX
  228. @2:
  229. {$ENDIF}
  230. {$IFDEF TARGET_x64}
  231. // blend foreground color (F) to a background color (B),
  232. // using alpha channel value of F
  233. // ECX <- F
  234. // [EDX] <- B
  235. // R8 <- M
  236. // Result := M * Fa * (Fargb - Bargb) + Bargb
  237. TEST ECX,$FF000000
  238. JZ @1
  239. MOV EAX,ECX
  240. SHR EAX,24
  241. INC R8D // 255:256 range bias
  242. IMUL R8D,EAX
  243. SHR R8D,8
  244. JZ @1
  245. PXOR MM0,MM0
  246. MOVD MM1,ECX
  247. SHL R8D,4
  248. MOVD MM2,[RDX]
  249. PUNPCKLBW MM1,MM0
  250. PUNPCKLBW MM2,MM0
  251. {$IFNDEF FPC}
  252. ADD R8,alpha_ptr
  253. {$ELSE}
  254. ADD R8,[RIP+alpha_ptr]
  255. {$ENDIF}
  256. PSUBW MM1,MM2
  257. PMULLW MM1,[R8]
  258. PSLLW MM2,8
  259. {$IFNDEF FPC}
  260. MOV RAX,bias_ptr
  261. {$ELSE}
  262. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  263. {$ENDIF}
  264. PADDW MM2,[RAX]
  265. PADDW MM1,MM2
  266. PSRLW MM1,8
  267. PACKUSWB MM1,MM0
  268. MOVD [RDX],MM1
  269. @1:
  270. {$ENDIF}
  271. end;
  272. function BlendRegRGB_MMX(F, B: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  273. asm
  274. {$IFDEF TARGET_x86}
  275. PXOR MM2,MM2
  276. MOVD MM0,EAX
  277. PUNPCKLBW MM0,MM2
  278. MOVD MM1,EDX
  279. PUNPCKLBW MM1,MM2
  280. BSWAP ECX
  281. PSUBW MM0,MM1
  282. MOVD MM3,ECX
  283. PUNPCKLBW MM3,MM2
  284. PMULLW MM0,MM3
  285. MOV EAX,bias_ptr
  286. PSLLW MM1,8
  287. PADDW MM1,[EAX]
  288. PADDW MM1,MM0
  289. PSRLW MM1,8
  290. PACKUSWB MM1,MM2
  291. MOVD EAX,MM1
  292. {$ENDIF}
  293. {$IFDEF TARGET_x64}
  294. PXOR MM2,MM2
  295. MOVD MM0,ECX
  296. PUNPCKLBW MM0,MM2
  297. MOVD MM1,EDX
  298. PUNPCKLBW MM1,MM2
  299. BSWAP R8D
  300. PSUBW MM0,MM1
  301. MOVD MM3,R8D
  302. PUNPCKLBW MM3,MM2
  303. PMULLW MM0,MM3
  304. {$IFNDEF FPC}
  305. MOV RAX,bias_ptr
  306. {$ELSE}
  307. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  308. {$ENDIF}
  309. PSLLW MM1,8
  310. PADDW MM1,[RAX]
  311. PADDW MM1,MM0
  312. PSRLW MM1,8
  313. PACKUSWB MM1,MM2
  314. MOVD EAX,MM1
  315. {$ENDIF}
  316. end;
  317. procedure BlendMemRGB_MMX(F: TColor32; var B: TColor32; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  318. asm
  319. {$IFDEF TARGET_x86}
  320. PXOR MM2,MM2
  321. MOVD MM0,EAX
  322. PUNPCKLBW MM0,MM2
  323. MOVD MM1,[EDX]
  324. PUNPCKLBW MM1,MM2
  325. BSWAP ECX
  326. PSUBW MM0,MM1
  327. MOVD MM3,ECX
  328. PUNPCKLBW MM3,MM2
  329. PMULLW MM0,MM3
  330. MOV EAX,bias_ptr
  331. PSLLW MM1,8
  332. PADDW MM1,[EAX]
  333. PADDW MM1,MM0
  334. PSRLW MM1,8
  335. PACKUSWB MM1,MM2
  336. MOVD [EDX],MM1
  337. {$ENDIF}
  338. {$IFDEF TARGET_x64}
  339. PXOR MM2,MM2
  340. MOVD MM0,ECX
  341. PUNPCKLBW MM0,MM2
  342. MOVD MM1,[EDX]
  343. PUNPCKLBW MM1,MM2
  344. BSWAP R8D
  345. PSUBW MM0,MM1
  346. MOVD MM3,R8D
  347. PUNPCKLBW MM3,MM2
  348. PMULLW MM0,MM3
  349. {$IFNDEF FPC}
  350. MOV RAX,bias_ptr
  351. {$ELSE}
  352. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  353. {$ENDIF}
  354. PSLLW MM1,8
  355. PADDW MM1,[RAX]
  356. PADDW MM1,MM0
  357. PSRLW MM1,8
  358. PACKUSWB MM1,MM2
  359. MOVD [EDX],MM1
  360. {$ENDIF}
  361. end;
  362. {$IFDEF TARGET_x86}
  363. procedure BlendLine_MMX(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  364. asm
  365. // EAX <- Src
  366. // EDX <- Dst
  367. // ECX <- Count
  368. // test the counter for zero or negativity
  369. TEST ECX,ECX
  370. JS @4
  371. PUSH ESI
  372. PUSH EDI
  373. MOV ESI,EAX // ESI <- Src
  374. MOV EDI,EDX // EDI <- Dst
  375. // loop start
  376. @1: MOV EAX,[ESI]
  377. TEST EAX,$FF000000
  378. JZ @3 // complete transparency, proceed to next point
  379. CMP EAX,$FF000000
  380. JNC @2 // opaque pixel, copy without blending
  381. // blend
  382. MOVD MM0,EAX // MM0 <- 00 00 00 00 Fa Fr Fg Fb
  383. PXOR MM3,MM3 // MM3 <- 00 00 00 00 00 00 00 00
  384. MOVD MM2,[EDI] // MM2 <- 00 00 00 00 Ba Br Bg Bb
  385. PUNPCKLBW MM0,MM3 // MM0 <- 00 Fa 00 Fr 00 Fg 00 Fb
  386. MOV EAX,bias_ptr
  387. PUNPCKLBW MM2,MM3 // MM2 <- 00 Ba 00 Br 00 Bg 00 Bb
  388. MOVQ MM1,MM0 // MM1 <- 00 Fa 00 Fr 00 Fg 00 Fb
  389. PUNPCKHWD MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 ** 00 **
  390. PSUBW MM0,MM2 // MM0 <- 00 Da 00 Dr 00 Dg 00 Db
  391. PUNPCKHDQ MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 Fa 00 Fa
  392. PSLLW MM2,8 // MM2 <- Ba 00 Br 00 Bg 00 Bb 00
  393. PMULLW MM0,MM1 // MM0 <- Pa ** Pr ** Pg ** Pb **
  394. PADDW MM2,[EAX] // add bias
  395. PADDW MM2,MM0 // MM2 <- Qa ** Qr ** Qg ** Qb **
  396. PSRLW MM2,8 // MM2 <- 00 Qa 00 Qr 00 Qg 00 Qb
  397. PACKUSWB MM2,MM3 // MM2 <- 00 00 00 00 Qa Qr Qg Qb
  398. MOVD EAX,MM2 // EAX <- Qa Qr Qg Qb
  399. OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
  400. @2: MOV [EDI],EAX
  401. @3: ADD ESI,4
  402. ADD EDI,4
  403. // loop end
  404. DEC ECX
  405. JNZ @1
  406. POP EDI
  407. POP ESI
  408. @4:
  409. end;
  410. procedure BlendLineEx_MMX(Src, Dst: PColor32; Count: Integer; M: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  411. asm
  412. // EAX <- Src
  413. // EDX <- Dst
  414. // ECX <- Count
  415. // test the counter for zero or negativity
  416. TEST ECX,ECX
  417. JS @4
  418. PUSH ESI
  419. PUSH EDI
  420. PUSH EBX
  421. MOV ESI,EAX // ESI <- Src
  422. MOV EDI,EDX // EDI <- Dst
  423. MOV EDX,M // EDX <- Master Alpha
  424. // loop start
  425. @1: MOV EAX,[ESI]
  426. TEST EAX,$FF000000
  427. JZ @3 // complete transparency, proceed to next point
  428. MOV EBX,EAX
  429. SHR EBX,24
  430. INC EBX // 255:256 range bias
  431. IMUL EBX,EDX
  432. SHR EBX,8
  433. JZ @3 // complete transparency, proceed to next point
  434. // blend
  435. PXOR MM0,MM0
  436. MOVD MM1,EAX
  437. SHL EBX,4
  438. MOVD MM2,[EDI]
  439. PUNPCKLBW MM1,MM0
  440. PUNPCKLBW MM2,MM0
  441. ADD EBX,alpha_ptr
  442. PSUBW MM1,MM2
  443. PMULLW MM1,[EBX]
  444. PSLLW MM2,8
  445. MOV EBX,bias_ptr
  446. PADDW MM2,[EBX]
  447. PADDW MM1,MM2
  448. PSRLW MM1,8
  449. PACKUSWB MM1,MM0
  450. MOVD EAX,MM1
  451. @2: MOV [EDI],EAX
  452. @3: ADD ESI,4
  453. ADD EDI,4
  454. // loop end
  455. DEC ECX
  456. JNZ @1
  457. POP EBX
  458. POP EDI
  459. POP ESI
  460. @4:
  461. end;
  462. {$ENDIF}
  463. function CombineReg_MMX(X, Y: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  464. asm
  465. {$IFDEF TARGET_X86}
  466. // EAX - Color X
  467. // EDX - Color Y
  468. // ECX - Weight of X [0..255]
  469. // Result := W * (X - Y) + Y
  470. MOVD MM1,EAX
  471. PXOR MM0,MM0
  472. SHL ECX,4
  473. MOVD MM2,EDX
  474. PUNPCKLBW MM1,MM0
  475. PUNPCKLBW MM2,MM0
  476. ADD ECX,alpha_ptr
  477. PSUBW MM1,MM2
  478. PMULLW MM1,[ECX]
  479. PSLLW MM2,8
  480. MOV ECX,bias_ptr
  481. PADDW MM2,[ECX]
  482. PADDW MM1,MM2
  483. PSRLW MM1,8
  484. PACKUSWB MM1,MM0
  485. MOVD EAX,MM1
  486. {$ENDIF}
  487. {$IFDEF TARGET_X64}
  488. // ECX - Color X
  489. // EDX - Color Y
  490. // R8 - Weight of X [0..255]
  491. // Result := W * (X - Y) + Y
  492. MOVD MM1,ECX
  493. PXOR MM0,MM0
  494. SHL R8D,4
  495. MOVD MM2,EDX
  496. PUNPCKLBW MM1,MM0
  497. PUNPCKLBW MM2,MM0
  498. {$IFNDEF FPC}
  499. ADD R8,alpha_ptr
  500. {$ELSE}
  501. ADD R8,[RIP+alpha_ptr]
  502. {$ENDIF}
  503. PSUBW MM1,MM2
  504. PMULLW MM1,[R8]
  505. PSLLW MM2,8
  506. {$IFNDEF FPC}
  507. MOV RAX,bias_ptr
  508. {$ELSE}
  509. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  510. {$ENDIF}
  511. PADDW MM2,[RAX]
  512. PADDW MM1,MM2
  513. PSRLW MM1,8
  514. PACKUSWB MM1,MM0
  515. MOVD EAX,MM1
  516. {$ENDIF}
  517. end;
  518. procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  519. asm
  520. {$IFDEF TARGET_X86}
  521. // EAX - Color X
  522. // [EDX] - Color Y
  523. // ECX - Weight of X [0..255]
  524. // Result := W * (X - Y) + Y
  525. JCXZ @1
  526. CMP ECX,$FF
  527. JZ @2
  528. MOVD MM1,EAX
  529. PXOR MM0,MM0
  530. SHL ECX,4
  531. MOVD MM2,[EDX]
  532. PUNPCKLBW MM1,MM0
  533. PUNPCKLBW MM2,MM0
  534. ADD ECX,alpha_ptr
  535. PSUBW MM1,MM2
  536. PMULLW MM1,[ECX]
  537. PSLLW MM2,8
  538. MOV ECX,bias_ptr
  539. PADDW MM2,[ECX]
  540. PADDW MM1,MM2
  541. PSRLW MM1,8
  542. PACKUSWB MM1,MM0
  543. MOVD [EDX],MM1
  544. @1: RET
  545. @2: MOV [EDX],EAX
  546. {$ENDIF}
  547. {$IFDEF TARGET_x64}
  548. // ECX - Color X
  549. // [RDX] - Color Y
  550. // R8 - Weight of X [0..255]
  551. // Result := W * (X - Y) + Y
  552. TEST R8D,R8D // Set flags for R8
  553. JZ @1 // W = 0 ? => Result := EDX
  554. CMP R8D,$FF
  555. JZ @2
  556. MOVD MM1,ECX
  557. PXOR MM0,MM0
  558. SHL R8D,4
  559. MOVD MM2,[RDX]
  560. PUNPCKLBW MM1,MM0
  561. PUNPCKLBW MM2,MM0
  562. {$IFNDEF FPC}
  563. ADD R8,alpha_ptr
  564. {$ELSE}
  565. ADD R8,[RIP+alpha_ptr]
  566. {$ENDIF}
  567. PSUBW MM1,MM2
  568. PMULLW MM1,[R8]
  569. PSLLW MM2,8
  570. {$IFNDEF FPC}
  571. MOV RAX,bias_ptr
  572. {$ELSE}
  573. MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
  574. {$ENDIF}
  575. PADDW MM2,[RAX]
  576. PADDW MM1,MM2
  577. PSRLW MM1,8
  578. PACKUSWB MM1,MM0
  579. MOVD [RDX],MM1
  580. @1: RET
  581. @2: MOV [RDX],RCX
  582. {$ENDIF}
  583. end;
  584. {$IFDEF TARGET_x86}
  585. procedure CombineLine_MMX(Src, Dst: PColor32; Count: Integer; W: Cardinal); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  586. asm
  587. // EAX <- Src
  588. // EDX <- Dst
  589. // ECX <- Count
  590. // Result := W * (X - Y) + Y
  591. TEST ECX,ECX
  592. JS @3
  593. PUSH EBX
  594. MOV EBX,W
  595. TEST EBX,EBX
  596. JZ @2 // weight is zero
  597. CMP EBX,$FF
  598. JZ @4 // weight = 255 => copy src to dst
  599. SHL EBX,4
  600. ADD EBX,alpha_ptr
  601. MOVQ MM3,[EBX]
  602. MOV EBX,bias_ptr
  603. MOVQ MM4,[EBX]
  604. // loop start
  605. @1: MOVD MM1,[EAX]
  606. PXOR MM0,MM0
  607. MOVD MM2,[EDX]
  608. PUNPCKLBW MM1,MM0
  609. PUNPCKLBW MM2,MM0
  610. PSUBW MM1,MM2
  611. PMULLW MM1,MM3
  612. PSLLW MM2,8
  613. PADDW MM2,MM4
  614. PADDW MM1,MM2
  615. PSRLW MM1,8
  616. PACKUSWB MM1,MM0
  617. MOVD [EDX],MM1
  618. ADD EAX,4
  619. ADD EDX,4
  620. DEC ECX
  621. JNZ @1
  622. @2: POP EBX
  623. POP EBP
  624. @3: RET $0004
  625. @4: CALL GR32_LowLevel.MoveLongword
  626. POP EBX
  627. end;
  628. {$ENDIF}
  629. procedure EMMS_MMX; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  630. asm
  631. EMMS
  632. end;
  633. function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  634. asm
  635. {$IFDEF TARGET_X86}
  636. MOVD MM0,EAX
  637. TEST EDX,EDX
  638. JL @1
  639. IMUL EDX,$010101
  640. MOVD MM1,EDX
  641. PADDUSB MM0,MM1
  642. MOVD EAX,MM0
  643. RET
  644. @1: NEG EDX
  645. IMUL EDX,$010101
  646. MOVD MM1,EDX
  647. PSUBUSB MM0,MM1
  648. MOVD EAX,MM0
  649. {$ENDIF}
  650. {$IFDEF TARGET_X64}
  651. MOVD MM0,ECX
  652. TEST EDX,EDX
  653. JL @1
  654. IMUL EDX,$010101
  655. MOVD MM1,EDX
  656. PADDUSB MM0,MM1
  657. MOVD EAX,MM0
  658. RET
  659. @1: NEG EDX
  660. IMUL EDX,$010101
  661. MOVD MM1,EDX
  662. PSUBUSB MM0,MM1
  663. MOVD EAX,MM0
  664. {$ENDIF}
  665. end;
  666. { MMX Color algebra versions }
  667. function ColorAdd_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  668. asm
  669. {$IFDEF TARGET_X86}
  670. MOVD MM0,EAX
  671. MOVD MM1,EDX
  672. PADDUSB MM0,MM1
  673. MOVD EAX,MM0
  674. {$ENDIF}
  675. {$IFDEF TARGET_X64}
  676. MOVD MM0,ECX
  677. MOVD MM1,EDX
  678. PADDUSB MM0,MM1
  679. MOVD EAX,MM0
  680. {$ENDIF}
  681. end;
  682. function ColorSub_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  683. asm
  684. {$IFDEF TARGET_X86}
  685. MOVD MM0,EAX
  686. MOVD MM1,EDX
  687. PSUBUSB MM0,MM1
  688. MOVD EAX,MM0
  689. {$ENDIF}
  690. {$IFDEF TARGET_X64}
  691. MOVD MM0,ECX
  692. MOVD MM1,EDX
  693. PSUBUSB MM0,MM1
  694. MOVD EAX,MM0
  695. {$ENDIF}
  696. end;
  697. function ColorModulate_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  698. asm
  699. {$IFDEF TARGET_X86}
  700. PXOR MM2,MM2
  701. MOVD MM0,EAX
  702. PUNPCKLBW MM0,MM2
  703. MOVD MM1,EDX
  704. PUNPCKLBW MM1,MM2
  705. PMULLW MM0,MM1
  706. PSRLW MM0,8
  707. PACKUSWB MM0,MM2
  708. MOVD EAX,MM0
  709. {$ENDIF}
  710. {$IFDEF TARGET_X64}
  711. PXOR MM2,MM2
  712. MOVD MM0,ECX
  713. PUNPCKLBW MM0,MM2
  714. MOVD MM1,EDX
  715. PUNPCKLBW MM1,MM2
  716. PMULLW MM0,MM1
  717. PSRLW MM0,8
  718. PACKUSWB MM0,MM2
  719. MOVD EAX,MM0
  720. {$ENDIF}
  721. end;
  722. function ColorMax_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  723. asm
  724. {$IFDEF TARGET_X86}
  725. MOVD MM0,EAX
  726. MOVD MM1,EDX
  727. PMAXUB MM0,MM1
  728. MOVD EAX,MM0
  729. {$ENDIF}
  730. {$IFDEF TARGET_X64}
  731. MOVD MM0,ECX
  732. MOVD MM1,EDX
  733. PMAXUB MM0,MM1
  734. MOVD EAX,MM0
  735. {$ENDIF}
  736. end;
  737. function ColorMin_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  738. asm
  739. {$IFDEF TARGET_X86}
  740. MOVD MM0,EAX
  741. MOVD MM1,EDX
  742. PMINUB MM0,MM1
  743. MOVD EAX,MM0
  744. {$ENDIF}
  745. {$IFDEF TARGET_X64}
  746. MOVD MM0,ECX
  747. MOVD MM1,EDX
  748. PMINUB MM0,MM1
  749. MOVD EAX,MM0
  750. {$ENDIF}
  751. end;
  752. function ColorDifference_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  753. asm
  754. {$IFDEF TARGET_X86}
  755. MOVD MM0,EAX
  756. MOVD MM1,EDX
  757. MOVQ MM2,MM0
  758. PSUBUSB MM0,MM1
  759. PSUBUSB MM1,MM2
  760. POR MM0,MM1
  761. MOVD EAX,MM0
  762. {$ENDIF}
  763. {$IFDEF TARGET_X64}
  764. MOVD MM0,ECX
  765. MOVD MM1,EDX
  766. MOVQ MM2,MM0
  767. PSUBUSB MM0,MM1
  768. PSUBUSB MM1,MM2
  769. POR MM0,MM1
  770. MOVD EAX,MM0
  771. {$ENDIF}
  772. end;
  773. function ColorExclusion_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  774. asm
  775. {$IFDEF TARGET_X86}
  776. PXOR MM2,MM2
  777. MOVD MM0,EAX
  778. PUNPCKLBW MM0,MM2
  779. MOVD MM1,EDX
  780. PUNPCKLBW MM1,MM2
  781. MOVQ MM3,MM0
  782. PADDW MM0,MM1
  783. PMULLW MM1,MM3
  784. PSRLW MM1,7
  785. PSUBUSW MM0,MM1
  786. PACKUSWB MM0,MM2
  787. MOVD EAX,MM0
  788. {$ENDIF}
  789. {$IFDEF TARGET_X64}
  790. PXOR MM2,MM2
  791. MOVD MM0,ECX
  792. PUNPCKLBW MM0,MM2
  793. MOVD MM1,EDX
  794. PUNPCKLBW MM1,MM2
  795. MOVQ MM3,MM0
  796. PADDW MM0,MM1
  797. PMULLW MM1,MM3
  798. PSRLW MM1,7
  799. PSUBUSW MM0,MM1
  800. PACKUSWB MM0,MM2
  801. MOVD EAX,MM0
  802. {$ENDIF}
  803. end;
  804. function ColorScale_MMX(C: TColor32; W: Cardinal): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  805. asm
  806. {$IFDEF TARGET_X86}
  807. PXOR MM2,MM2
  808. SHL EDX,4
  809. MOVD MM0,EAX
  810. PUNPCKLBW MM0,MM2
  811. ADD EDX,alpha_ptr
  812. PMULLW MM0,[EDX]
  813. PSRLW MM0,8
  814. PACKUSWB MM0,MM2
  815. MOVD EAX,MM0
  816. {$ENDIF}
  817. {$IFDEF TARGET_X64}
  818. PXOR MM2,MM2
  819. SHL RDX,4
  820. MOVD MM0,ECX
  821. PUNPCKLBW MM0,MM2
  822. {$IFNDEF FPC}
  823. ADD RDX,alpha_ptr
  824. {$ELSE}
  825. ADD RDX,[RIP+alpha_ptr]
  826. {$ENDIF}
  827. PMULLW MM0,[RDX]
  828. PSRLW MM0,8
  829. PACKUSWB MM0,MM2
  830. MOVD EAX,MM0
  831. {$ENDIF}
  832. end;
  833. procedure RegisterBindingFunctions;
  834. begin
  835. {$IFNDEF PUREPASCAL}
  836. {$IFNDEF OMIT_MMX}
  837. BlendRegistry.Add(FID_EMMS, @EMMS_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  838. BlendRegistry.Add(FID_COMBINEREG, @CombineReg_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  839. BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  840. BlendRegistry.Add(FID_COMBINELINE, @CombineLine_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  841. BlendRegistry.Add(FID_BLENDREG, @BlendReg_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  842. BlendRegistry.Add(FID_BLENDMEM, @BlendMem_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  843. BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  844. BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  845. BlendRegistry.Add(FID_BLENDLINE, @BlendLine_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  846. BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  847. BlendRegistry.Add(FID_COLORMAX, @ColorMax_EMMX, [ciEMMX], 0, BlendRegistryPriorityMMX);
  848. BlendRegistry.Add(FID_COLORMIN, @ColorMin_EMMX, [ciEMMX], 0, BlendRegistryPriorityMMX);
  849. BlendRegistry.Add(FID_COLORADD, @ColorAdd_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  850. BlendRegistry.Add(FID_COLORSUB, @ColorSub_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  851. BlendRegistry.Add(FID_COLORMODULATE, @ColorModulate_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  852. BlendRegistry.Add(FID_COLORDIFFERENCE, @ColorDifference_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  853. BlendRegistry.Add(FID_COLOREXCLUSION, @ColorExclusion_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  854. BlendRegistry.Add(FID_COLORSCALE, @ColorScale_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  855. BlendRegistry.Add(FID_LIGHTEN, @LightenReg_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  856. BlendRegistry.Add(FID_BLENDREGRGB, @BlendRegRGB_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  857. BlendRegistry.Add(FID_BLENDMEMRGB, @BlendMemRGB_MMX, [ciMMX], 0, BlendRegistryPriorityMMX);
  858. {$ENDIF}
  859. {$ENDIF}
  860. end;
  861. initialization
  862. RegisterBindingFunctions;
  863. end.