diff options
author | Johann <johannkoenig@google.com> | 2016-09-29 15:21:26 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2016-09-29 15:25:27 -0700 |
commit | 721354fe7f549bfeaa35e1adf9ff746424f2aa0e (patch) | |
tree | 0e6d94f5ff59896e5067a065037c28c04181c87e /vp8/encoder | |
parent | 2663b092aec94c5dc9cf2cc9d52f9752a31aa699 (diff) | |
download | libvpx-721354fe7f549bfeaa35e1adf9ff746424f2aa0e.tar libvpx-721354fe7f549bfeaa35e1adf9ff746424f2aa0e.tar.gz libvpx-721354fe7f549bfeaa35e1adf9ff746424f2aa0e.tar.bz2 libvpx-721354fe7f549bfeaa35e1adf9ff746424f2aa0e.zip |
vp8: remove mmx functions
When they have sse2 equivalents.
Change-Id: I158f631a3bcecba57b36093ac10114b1904767a7
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/x86/dct_mmx.asm | 241 | ||||
-rw-r--r-- | vp8/encoder/x86/encodeopt.asm | 200 | ||||
-rw-r--r-- | vp8/encoder/x86/vp8_enc_stubs_mmx.c | 20 |
3 files changed, 0 insertions, 461 deletions
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm deleted file mode 100644 index 6f188cb94..000000000 --- a/vp8/encoder/x86/dct_mmx.asm +++ /dev/null @@ -1,241 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) -global sym(vp8_short_fdct4x4_mmx) PRIVATE -sym(vp8_short_fdct4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ; input - mov rdi, arg(1) ; output - - movsxd rax, dword ptr arg(2) ;pitch - - lea rcx, [rsi + rax*2] - ; read the input data - movq mm0, [rsi] - movq mm1, [rsi + rax] - - movq mm2, [rcx] - movq mm4, [rcx + rax] - - ; transpose for the first stage - movq mm3, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 20 21 22 23 - - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm3, mm1 ; 02 12 03 13 - - punpcklwd mm2, mm4 ; 20 30 21 31 - punpckhwd mm5, mm4 ; 22 32 23 33 - - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 - - punpckhdq mm1, mm2 ; 01 11 21 31 - - movq mm2, mm3 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 - - punpckhdq mm3, mm5 ; 03 13 23 33 - - ; mm0 0 - ; mm1 1 - ; mm2 2 - ; mm3 3 - - ; first stage - movq mm5, mm0 - movq mm4, mm1 - - paddw mm0, mm3 ; a1 = 0 + 3 - paddw mm1, mm2 ; b1 = 1 + 2 - - psubw mm4, mm2 ; c1 = 1 - 2 - psubw mm5, mm3 ; d1 = 0 - 3 - - psllw mm5, 3 - psllw mm4, 3 - - psllw mm0, 3 - psllw mm1, 3 - - ; output 0 and 2 - movq mm2, mm0 ; a1 - - paddw mm0, mm1 ; op[0] = a1 + b1 - psubw mm2, mm1 ; op[2] = a1 - b1 - - ; output 1 and 3 - ; interleave c1, d1 - movq mm1, mm5 ; d1 - punpcklwd mm1, mm4 ; c1 d1 - punpckhwd mm5, mm4 ; c1 d1 - - movq mm3, mm1 - movq mm4, mm5 - - pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd mm1, MMWORD PTR[GLOBAL(_14500)] - paddd mm4, MMWORD PTR[GLOBAL(_14500)] - paddd mm3, MMWORD PTR[GLOBAL(_7500)] - paddd mm5, MMWORD PTR[GLOBAL(_7500)] - - psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw mm1, mm4 ; op[1] - packssdw mm3, mm5 ; op[3] - - ; done with vertical - ; transpose for the second stage - movq mm4, mm0 ; 00 10 20 30 - movq mm5, mm2 ; 02 12 22 32 - - punpcklwd mm0, mm1 ; 00 01 10 11 - punpckhwd mm4, mm1 ; 20 21 30 31 - - punpcklwd mm2, mm3 ; 02 03 12 13 - punpckhwd mm5, mm3 ; 22 23 32 33 - - movq mm1, mm0 ; 00 01 10 11 - punpckldq mm0, mm2 ; 00 01 02 03 - - punpckhdq mm1, mm2 ; 01 22 12 13 - - movq mm2, mm4 ; 20 31 30 31 - punpckldq mm2, mm5 ; 20 21 22 23 - - punpckhdq mm4, mm5 ; 30 31 32 33 - - ; mm0 0 - ; mm1 1 - ; mm2 2 - ; mm3 4 - - movq mm5, mm0 - movq mm3, mm1 - - paddw mm0, mm4 ; a1 = 0 + 3 - paddw mm1, mm2 ; b1 = 1 + 2 - - psubw mm3, mm2 ; c1 = 1 - 2 - psubw mm5, mm4 ; d1 = 0 - 3 - - pxor mm6, mm6 ; zero out for compare - - pcmpeqw mm6, mm5 ; d1 != 0 - - pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper, - ; and keep bit 0 of lower - - ; output 0 and 2 - movq mm2, mm0 ; a1 - - paddw mm0, mm1 ; a1 + b1 - psubw mm2, mm1 ; a1 - b1 - - paddw mm0, MMWORD PTR[GLOBAL(_7w)] - paddw mm2, MMWORD PTR[GLOBAL(_7w)] - - psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4 - psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - - movq MMWORD PTR[rdi + 0 ], mm0 - movq MMWORD PTR[rdi + 16], mm2 - - ; output 1 and 3 - ; interleave c1, d1 - movq mm1, mm5 ; d1 - punpcklwd mm1, mm3 ; c1 d1 - punpckhwd mm5, mm3 ; c1 d1 - - movq mm3, mm1 - movq mm4, mm5 - - pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd mm1, MMWORD PTR[GLOBAL(_12000)] - paddd mm4, MMWORD PTR[GLOBAL(_12000)] - paddd mm3, MMWORD PTR[GLOBAL(_51000)] - paddd mm5, MMWORD PTR[GLOBAL(_51000)] - - psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - - packssdw mm1, mm4 ; op[4] - packssdw mm3, mm5 ; op[12] - - paddw mm1, mm6 ; op[4] += (d1!=0) - - movq MMWORD PTR[rdi + 8 ], mm1 - movq MMWORD PTR[rdi + 24], mm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 8 -_5352_2217: - dw 5352 - dw 2217 - dw 5352 - dw 2217 -align 8 -_2217_neg5352: - dw 2217 - dw -5352 - dw 2217 - dw -5352 -align 8 -_cmp_mask: - times 4 dw 1 -align 8 -_7w: - times 4 dw 7 -align 8 -_14500: - times 2 dd 14500 -align 8 -_7500: - times 2 dd 7500 -align 8 -_12000: - times 2 dd 12000 -align 8 -_51000: - times 2 dd 51000 diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm index 9c8780125..0297220ee 100644 --- a/vp8/encoder/x86/encodeopt.asm +++ b/vp8/encoder/x86/encodeopt.asm @@ -59,149 +59,6 @@ sym(vp8_block_error_sse2): pop rbp ret -;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) -global sym(vp8_block_error_mmx) PRIVATE -sym(vp8_block_error_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor mm7, mm7 - - mov rdi, arg(1) ;dcoef_ptr - movq mm3, [rsi] - - movq mm4, [rdi] - movq mm5, [rsi+8] - - movq mm6, [rdi+8] - pxor mm1, mm1 ; from movd mm1, dc ; dc =0 - - movq mm2, mm7 - psubw mm5, mm6 - - por mm1, mm2 - pmaddwd mm5, mm5 - - pcmpeqw mm1, mm7 - psubw mm3, mm4 - - pand mm1, mm3 - pmaddwd mm1, mm1 - - paddd mm1, mm5 - movq mm3, [rsi+16] - - movq mm4, [rdi+16] - movq mm5, [rsi+24] - - movq mm6, [rdi+24] - psubw mm5, mm6 - - pmaddwd mm5, mm5 - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm3, mm5 - - paddd mm1, mm3 - movq mm0, mm1 - - psrlq mm1, 32 - paddd mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -global sym(vp8_mbblock_error_mmx_impl) PRIVATE -sym(vp8_mbblock_error_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor mm7, mm7 - - mov rdi, arg(1) ;dcoef_ptr - pxor mm2, mm2 - - movd mm1, dword ptr arg(2) ;dc - por mm1, mm2 - - pcmpeqw mm1, mm7 - mov rcx, 16 - -.mberror_loop_mmx: - movq mm3, [rsi] - movq mm4, [rdi] - - movq mm5, [rsi+8] - movq mm6, [rdi+8] - - - psubw mm5, mm6 - pmaddwd mm5, mm5 - - psubw mm3, mm4 - pand mm3, mm1 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - movq mm3, [rsi+16] - - movq mm4, [rdi+16] - movq mm5, [rsi+24] - - movq mm6, [rdi+24] - psubw mm5, mm6 - - pmaddwd mm5, mm5 - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - add rsi, 32 - - add rdi, 32 - sub rcx, 1 - - jnz .mberror_loop_mmx - - movq mm0, mm2 - psrlq mm2, 32 - - paddd mm0, mm2 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - ;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); global sym(vp8_mbblock_error_sse2_impl) PRIVATE sym(vp8_mbblock_error_sse2_impl): @@ -272,63 +129,6 @@ sym(vp8_mbblock_error_sse2_impl): ret -;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp8_mbuverror_mmx_impl) PRIVATE -sym(vp8_mbuverror_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor mm7, mm7 - -.mbuverror_loop_mmx: - - movq mm1, [rsi] - movq mm2, [rdi] - - psubw mm1, mm2 - pmaddwd mm1, mm1 - - - movq mm3, [rsi+8] - movq mm4, [rdi+8] - - psubw mm3, mm4 - pmaddwd mm3, mm3 - - - paddd mm7, mm1 - paddd mm7, mm3 - - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop_mmx - - movq mm0, mm7 - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - ;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); global sym(vp8_mbuverror_sse2_impl) PRIVATE sym(vp8_mbuverror_sse2_impl): diff --git a/vp8/encoder/x86/vp8_enc_stubs_mmx.c b/vp8/encoder/x86/vp8_enc_stubs_mmx.c index d00abd4d0..4406dd0cc 100644 --- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c +++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c @@ -13,12 +13,6 @@ #include "vpx_ports/x86.h" #include "vp8/encoder/block.h" -void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); -void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) { - vp8_short_fdct4x4_mmx(input, output, pitch); - vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); -} - int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, short *qcoeff_ptr, short *dequant_ptr, const short *scan_mask, short *round_ptr, @@ -38,17 +32,3 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) { round_ptr, quant_ptr, dqcoeff_ptr); } - -int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) { - short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); -} - -int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -int vp8_mbuverror_mmx(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp8_mbuverror_mmx_impl(s_ptr, d_ptr); -} |