diff options
Diffstat (limited to 'vp8/common/x86')
-rw-r--r-- | vp8/common/x86/idct_x86.h | 4 | ||||
-rw-r--r-- | vp8/common/x86/iwalsh_mmx.asm | 84 | ||||
-rw-r--r-- | vp8/common/x86/iwalsh_sse2.asm | 46 | ||||
-rw-r--r-- | vp8/common/x86/x86_systemdependent.c | 3 |
4 files changed, 88 insertions, 49 deletions
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h index f9e3a794d..06e3ea4b5 100644 --- a/vp8/common/x86/idct_x86.h +++ b/vp8/common/x86/idct_x86.h @@ -24,7 +24,6 @@ extern prototype_idct(vp8_short_idct4x4llm_mmx); extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx); extern prototype_second_order(vp8_short_inv_walsh4x4_mmx); -extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_idct_idct16 @@ -36,9 +35,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx); #undef vp8_idct_iwalsh16 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx -#undef vp8_idct_iwalsh1 -#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_mmx - #endif #endif diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm index 10b5274dc..3ab066ba4 100644 --- a/vp8/common/x86/iwalsh_mmx.asm +++ b/vp8/common/x86/iwalsh_mmx.asm @@ -11,42 +11,6 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp8_short_inv_walsh4x4_1_mmx(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_1_mmx) -sym(vp8_short_inv_walsh4x4_1_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rax, 3 - - mov rdi, arg(1) - add rax, [rsi] ;input[0] + 3 - - movd mm0, eax - - punpcklwd mm0, mm0 ;x x val val - - punpckldq mm0, mm0 ;val val val val - - psraw mm0, 3 ;(input[0] + 3) >> 3 - - movq [rdi + 0], mm0 - movq [rdi + 8], mm0 - movq [rdi + 16], mm0 - movq [rdi + 24], mm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) global sym(vp8_short_inv_walsh4x4_mmx) sym(vp8_short_inv_walsh4x4_mmx): @@ -159,10 +123,50 @@ sym(vp8_short_inv_walsh4x4_mmx): psraw mm2, 3 psraw mm3, 3 - movq [rdi + 0], mm0 - movq [rdi + 8], mm1 - movq [rdi + 16], mm2 - movq [rdi + 24], mm3 +; movq [rdi + 0], mm0 +; movq [rdi + 8], mm1 +; movq [rdi + 16], mm2 +; movq [rdi + 24], mm3 + + movd eax, mm0 + psrlq mm0, 32 + mov word ptr[rdi+32*0], ax + shr eax, 16 + mov word ptr[rdi+32*1], ax + movd eax, mm0 + mov word ptr[rdi+32*2], ax + shr eax, 16 + mov word ptr[rdi+32*3], ax + + movd ecx, mm1 + psrlq mm1, 32 + mov word ptr[rdi+32*4], cx + shr ecx, 16 + mov word ptr[rdi+32*5], cx + movd ecx, mm1 + mov word ptr[rdi+32*6], cx + shr ecx, 16 + mov word ptr[rdi+32*7], cx + + movd eax, mm2 + psrlq mm2, 32 + mov word ptr[rdi+32*8], ax + shr eax, 16 + mov word ptr[rdi+32*9], ax + movd eax, mm2 + mov word ptr[rdi+32*10], ax + shr eax, 16 + mov word ptr[rdi+32*11], ax + + movd ecx, mm3 + psrlq mm3, 32 + mov word ptr[rdi+32*12], cx + shr ecx, 16 + mov word ptr[rdi+32*13], cx + movd ecx, mm3 + mov word ptr[rdi+32*14], cx + shr ecx, 16 + mov word ptr[rdi+32*15], cx ; begin epilog pop rdi diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index 1da4fd8da..5a7133d6c 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -96,8 +96,50 @@ sym(vp8_short_inv_walsh4x4_sse2): psraw xmm5, 3 psraw xmm1, 3 - movdqa [rdi + 0], xmm5 - movdqa [rdi + 16], xmm1 +;; movdqa [rdi + 0], xmm5 +;; movdqa [rdi + 16], xmm1 + + movd eax, xmm5 + psrldq xmm5, 4 + mov word ptr[rdi+32*0], ax + shr eax, 16 + mov word ptr[rdi+32*1], ax + movd eax, xmm5 + psrldq xmm5, 4 + mov word ptr[rdi+32*2], ax + shr eax, 16 + mov word ptr[rdi+32*3], ax + + movd eax, xmm5 + psrldq xmm5, 4 + mov word ptr[rdi+32*4], ax + shr eax, 16 + mov word ptr[rdi+32*5], ax + movd eax, xmm5 + mov word ptr[rdi+32*6], ax + shr eax, 16 + mov word ptr[rdi+32*7], ax + + movd eax, xmm1 + psrldq xmm1, 4 + mov word ptr[rdi+32*8], ax + shr eax, 16 + mov word ptr[rdi+32*9], ax + movd eax, xmm1 + psrldq xmm1, 4 + mov word ptr[rdi+32*10], ax + shr eax, 16 + mov word ptr[rdi+32*11], ax + + movd eax, xmm1 + psrldq xmm1, 4 + mov word ptr[rdi+32*12], ax + shr eax, 16 + mov word ptr[rdi+32*13], ax + movd eax, xmm1 + mov word ptr[rdi+32*14], ax + shr eax, 16 + mov word ptr[rdi+32*15], ax ; begin epilog pop rdi diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index c4e616a67..b24cbe48f 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -40,9 +40,6 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx; rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx; - - rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx; rtcd->recon.copy8x4 = vp8_copy_mem8x4_mmx; |