diff options
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 1 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 10 | ||||
-rw-r--r-- | vpx_dsp/x86/sad_mmx.asm | 427 |
3 files changed, 5 insertions, 433 deletions
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index dd8c6e831..43802d76d 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -285,7 +285,6 @@ DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c -DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f883ce553..219aace4b 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -983,16 +983,16 @@ add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc"; @@ -1001,7 +1001,7 @@ add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc"; # # Avg diff --git a/vpx_dsp/x86/sad_mmx.asm b/vpx_dsp/x86/sad_mmx.asm deleted file mode 100644 index 9968992bd..000000000 --- a/vpx_dsp/x86/sad_mmx.asm +++ /dev/null @@ -1,427 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -global sym(vpx_sad16x16_mmx) PRIVATE -global sym(vpx_sad8x16_mmx) PRIVATE -global sym(vpx_sad8x8_mmx) PRIVATE -global sym(vpx_sad4x4_mmx) PRIVATE -global sym(vpx_sad16x8_mmx) PRIVATE - -;unsigned int vpx_sad16x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpcklbw mm2, mm6 - - punpckhbw mm1, mm6 - punpckhbw mm3, mm6 - - paddw mm0, mm2 - paddw mm1, mm3 - - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - paddw mm7, mm1 - - cmp rsi, rcx - jne .x16x16sad_mmx_loop - - - movq mm0, mm7 - - punpcklwd mm0, mm6 - punpckhwd mm7, mm6 - - paddw mm0, mm7 - movq mm7, mm0 - - - psrlq mm0, 32 - paddw mm7, mm0 - - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad8x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad8x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - paddw mm7, mm2 - cmp rsi, rcx - - jne .x8x16sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad8x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x8sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - paddw mm0, mm2 - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - cmp rsi, rcx - - jne .x8x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad4x4_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - pxor mm3, mm3 - - punpcklbw mm0, mm3 - punpckhbw mm2, mm3 - - paddw mm0, mm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm4, DWORD PTR [rsi] - movd mm5, DWORD PTR [rdi] - - movd mm6, DWORD PTR [rsi+rax] - movd mm7, DWORD PTR [rdi+rdx] - - punpcklbw mm4, mm6 - punpcklbw mm5, mm7 - - movq mm6, mm4 - psubusb mm4, mm5 - - psubusb mm5, mm6 - por mm4, mm5 - - movq mm5, mm4 - punpcklbw mm4, mm3 - - punpckhbw mm5, mm3 - paddw mm4, mm5 - - paddw mm0, mm4 - movq mm1, mm0 - - punpcklwd mm0, mm3 - punpckhwd mm1, mm3 - - paddw mm0, mm1 - movq mm1, mm0 - - psrlq mm0, 32 - paddw mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad16x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad16x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x8sad_mmx_loop: - - movq mm0, [rsi] - movq mm1, [rdi] - - movq mm2, [rsi+8] - movq mm3, [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpckhbw mm1, mm6 - - punpcklbw mm2, mm6 - punpckhbw mm3, mm6 - - - paddw mm0, mm2 - paddw mm1, mm3 - - paddw mm0, mm1 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x16x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret |