recon_mmx.asm 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. ;void copy_mem8x8_mmx(
  12. ; unsigned char *src,
  13. ; int src_stride,
  14. ; unsigned char *dst,
  15. ; int dst_stride
  16. ; )
  17. global sym(vp8_copy_mem8x8_mmx) PRIVATE
  18. sym(vp8_copy_mem8x8_mmx):
  19. push rbp
  20. mov rbp, rsp
  21. SHADOW_ARGS_TO_STACK 4
  22. push rsi
  23. push rdi
  24. ; end prolog
  25. mov rsi, arg(0) ;src;
  26. movq mm0, [rsi]
  27. movsxd rax, dword ptr arg(1) ;src_stride;
  28. mov rdi, arg(2) ;dst;
  29. movq mm1, [rsi+rax]
  30. movq mm2, [rsi+rax*2]
  31. movsxd rcx, dword ptr arg(3) ;dst_stride
  32. lea rsi, [rsi+rax*2]
  33. movq [rdi], mm0
  34. add rsi, rax
  35. movq [rdi+rcx], mm1
  36. movq [rdi+rcx*2], mm2
  37. lea rdi, [rdi+rcx*2]
  38. movq mm3, [rsi]
  39. add rdi, rcx
  40. movq mm4, [rsi+rax]
  41. movq mm5, [rsi+rax*2]
  42. movq [rdi], mm3
  43. lea rsi, [rsi+rax*2]
  44. movq [rdi+rcx], mm4
  45. movq [rdi+rcx*2], mm5
  46. lea rdi, [rdi+rcx*2]
  47. movq mm0, [rsi+rax]
  48. movq mm1, [rsi+rax*2]
  49. movq [rdi+rcx], mm0
  50. movq [rdi+rcx*2],mm1
  51. ; begin epilog
  52. pop rdi
  53. pop rsi
  54. UNSHADOW_ARGS
  55. pop rbp
  56. ret
  57. ;void copy_mem8x4_mmx(
  58. ; unsigned char *src,
  59. ; int src_stride,
  60. ; unsigned char *dst,
  61. ; int dst_stride
  62. ; )
  63. global sym(vp8_copy_mem8x4_mmx) PRIVATE
  64. sym(vp8_copy_mem8x4_mmx):
  65. push rbp
  66. mov rbp, rsp
  67. SHADOW_ARGS_TO_STACK 4
  68. push rsi
  69. push rdi
  70. ; end prolog
  71. mov rsi, arg(0) ;src;
  72. movq mm0, [rsi]
  73. movsxd rax, dword ptr arg(1) ;src_stride;
  74. mov rdi, arg(2) ;dst;
  75. movq mm1, [rsi+rax]
  76. movq mm2, [rsi+rax*2]
  77. movsxd rcx, dword ptr arg(3) ;dst_stride
  78. lea rsi, [rsi+rax*2]
  79. movq [rdi], mm0
  80. movq [rdi+rcx], mm1
  81. movq [rdi+rcx*2], mm2
  82. lea rdi, [rdi+rcx*2]
  83. movq mm3, [rsi+rax]
  84. movq [rdi+rcx], mm3
  85. ; begin epilog
  86. pop rdi
  87. pop rsi
  88. UNSHADOW_ARGS
  89. pop rbp
  90. ret