encodeopt.asm 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. ;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr)
  12. global sym(vp8_block_error_sse2) PRIVATE
  13. sym(vp8_block_error_sse2):
  14. push rbp
  15. mov rbp, rsp
  16. SHADOW_ARGS_TO_STACK 2
  17. push rsi
  18. push rdi
  19. ; end prologue
  20. mov rsi, arg(0) ;coeff_ptr
  21. mov rdi, arg(1) ;dcoef_ptr
  22. movdqa xmm0, [rsi]
  23. movdqa xmm1, [rdi]
  24. movdqa xmm2, [rsi+16]
  25. movdqa xmm3, [rdi+16]
  26. psubw xmm0, xmm1
  27. psubw xmm2, xmm3
  28. pmaddwd xmm0, xmm0
  29. pmaddwd xmm2, xmm2
  30. paddd xmm0, xmm2
  31. pxor xmm5, xmm5
  32. movdqa xmm1, xmm0
  33. punpckldq xmm0, xmm5
  34. punpckhdq xmm1, xmm5
  35. paddd xmm0, xmm1
  36. movdqa xmm1, xmm0
  37. psrldq xmm0, 8
  38. paddd xmm0, xmm1
  39. movq rax, xmm0
  40. pop rdi
  41. pop rsi
  42. ; begin epilog
  43. UNSHADOW_ARGS
  44. pop rbp
  45. ret
  46. ;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
  47. global sym(vp8_mbblock_error_sse2_impl) PRIVATE
  48. sym(vp8_mbblock_error_sse2_impl):
  49. push rbp
  50. mov rbp, rsp
  51. SHADOW_ARGS_TO_STACK 3
  52. SAVE_XMM 6
  53. push rsi
  54. push rdi
  55. ; end prolog
  56. mov rsi, arg(0) ;coeff_ptr
  57. pxor xmm6, xmm6
  58. mov rdi, arg(1) ;dcoef_ptr
  59. pxor xmm4, xmm4
  60. movd xmm5, dword ptr arg(2) ;dc
  61. por xmm5, xmm4
  62. pcmpeqw xmm5, xmm6
  63. mov rcx, 16
  64. .mberror_loop:
  65. movdqa xmm0, [rsi]
  66. movdqa xmm1, [rdi]
  67. movdqa xmm2, [rsi+16]
  68. movdqa xmm3, [rdi+16]
  69. psubw xmm2, xmm3
  70. pmaddwd xmm2, xmm2
  71. psubw xmm0, xmm1
  72. pand xmm0, xmm5
  73. pmaddwd xmm0, xmm0
  74. add rsi, 32
  75. add rdi, 32
  76. sub rcx, 1
  77. paddd xmm4, xmm2
  78. paddd xmm4, xmm0
  79. jnz .mberror_loop
  80. movdqa xmm0, xmm4
  81. punpckldq xmm0, xmm6
  82. punpckhdq xmm4, xmm6
  83. paddd xmm0, xmm4
  84. movdqa xmm1, xmm0
  85. psrldq xmm0, 8
  86. paddd xmm0, xmm1
  87. movq rax, xmm0
  88. pop rdi
  89. pop rsi
  90. ; begin epilog
  91. RESTORE_XMM
  92. UNSHADOW_ARGS
  93. pop rbp
  94. ret
  95. ;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr);
  96. global sym(vp8_mbuverror_sse2_impl) PRIVATE
  97. sym(vp8_mbuverror_sse2_impl):
  98. push rbp
  99. mov rbp, rsp
  100. SHADOW_ARGS_TO_STACK 2
  101. push rsi
  102. push rdi
  103. ; end prolog
  104. mov rsi, arg(0) ;s_ptr
  105. mov rdi, arg(1) ;d_ptr
  106. mov rcx, 16
  107. pxor xmm3, xmm3
  108. .mbuverror_loop:
  109. movdqa xmm1, [rsi]
  110. movdqa xmm2, [rdi]
  111. psubw xmm1, xmm2
  112. pmaddwd xmm1, xmm1
  113. paddd xmm3, xmm1
  114. add rsi, 16
  115. add rdi, 16
  116. dec rcx
  117. jnz .mbuverror_loop
  118. pxor xmm0, xmm0
  119. movdqa xmm1, xmm3
  120. movdqa xmm2, xmm1
  121. punpckldq xmm1, xmm0
  122. punpckhdq xmm2, xmm0
  123. paddd xmm1, xmm2
  124. movdqa xmm2, xmm1
  125. psrldq xmm1, 8
  126. paddd xmm1, xmm2
  127. movq rax, xmm1
  128. pop rdi
  129. pop rsi
  130. ; begin epilog
  131. UNSHADOW_ARGS
  132. pop rbp
  133. ret