add_noise_sse2.asm 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. ;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise,
  12. ; int blackclamp, int whiteclamp,
  13. ; int width, int height, int pitch)
  14. global sym(vpx_plane_add_noise_sse2) PRIVATE
  15. sym(vpx_plane_add_noise_sse2):
  16. push rbp
  17. mov rbp, rsp
  18. SHADOW_ARGS_TO_STACK 7
  19. GET_GOT rbx
  20. push rsi
  21. push rdi
  22. mov rdx, 0x01010101
  23. mov rax, arg(2)
  24. mul rdx
  25. movd xmm3, rax
  26. pshufd xmm3, xmm3, 0 ; xmm3 is 16 copies of char in blackclamp
  27. mov rdx, 0x01010101
  28. mov rax, arg(3)
  29. mul rdx
  30. movd xmm4, rax
  31. pshufd xmm4, xmm4, 0 ; xmm4 is 16 copies of char in whiteclamp
  32. movdqu xmm5, xmm3 ; both clamp = black clamp + white clamp
  33. paddusb xmm5, xmm4
  34. .addnoise_loop:
  35. call sym(LIBVPX_RAND) WRT_PLT
  36. mov rcx, arg(1) ;noise
  37. and rax, 0xff
  38. add rcx, rax
  39. mov rdi, rcx
  40. movsxd rcx, dword arg(4) ;[Width]
  41. mov rsi, arg(0) ;Pos
  42. xor rax, rax
  43. .addnoise_nextset:
  44. movdqu xmm1,[rsi+rax] ; get the source
  45. psubusb xmm1, xmm3 ; subtract black clamp
  46. paddusb xmm1, xmm5 ; add both clamp
  47. psubusb xmm1, xmm4 ; subtract whiteclamp
  48. movdqu xmm2,[rdi+rax] ; get the noise for this line
  49. paddb xmm1,xmm2 ; add it in
  50. movdqu [rsi+rax],xmm1 ; store the result
  51. add rax,16 ; move to the next line
  52. cmp rax, rcx
  53. jl .addnoise_nextset
  54. movsxd rax, dword arg(6) ; Pitch
  55. add arg(0), rax ; Start += Pitch
  56. sub dword arg(5), 1 ; Height -= 1
  57. jg .addnoise_loop
  58. ; begin epilog
  59. pop rdi
  60. pop rsi
  61. RESTORE_GOT
  62. UNSHADOW_ARGS
  63. pop rbp
  64. ret
  65. SECTION_RODATA
  66. align 16
  67. rd42:
  68. times 8 dw 0x04
  69. four8s:
  70. times 4 dd 8