summaryrefslogtreecommitdiff
path: root/vp8/common/x86
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common/x86')
-rw-r--r--vp8/common/x86/idct_x86.h4
-rw-r--r--vp8/common/x86/iwalsh_mmx.asm84
-rw-r--r--vp8/common/x86/iwalsh_sse2.asm46
-rw-r--r--vp8/common/x86/x86_systemdependent.c3
4 files changed, 88 insertions, 49 deletions
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index f9e3a794d..06e3ea4b5 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -24,7 +24,6 @@ extern prototype_idct(vp8_short_idct4x4llm_mmx);
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
-extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct16
@@ -36,9 +35,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
-#undef vp8_idct_iwalsh1
-#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_mmx
-
#endif
#endif
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 10b5274dc..3ab066ba4 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -11,42 +11,6 @@
%include "vpx_ports/x86_abi_support.asm"
-;void vp8_short_inv_walsh4x4_1_mmx(short *input, short *output)
-global sym(vp8_short_inv_walsh4x4_1_mmx)
-sym(vp8_short_inv_walsh4x4_1_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 2
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0)
- mov rax, 3
-
- mov rdi, arg(1)
- add rax, [rsi] ;input[0] + 3
-
- movd mm0, eax
-
- punpcklwd mm0, mm0 ;x x val val
-
- punpckldq mm0, mm0 ;val val val val
-
- psraw mm0, 3 ;(input[0] + 3) >> 3
-
- movq [rdi + 0], mm0
- movq [rdi + 8], mm0
- movq [rdi + 16], mm0
- movq [rdi + 24], mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
global sym(vp8_short_inv_walsh4x4_mmx)
sym(vp8_short_inv_walsh4x4_mmx):
@@ -159,10 +123,50 @@ sym(vp8_short_inv_walsh4x4_mmx):
psraw mm2, 3
psraw mm3, 3
- movq [rdi + 0], mm0
- movq [rdi + 8], mm1
- movq [rdi + 16], mm2
- movq [rdi + 24], mm3
+; movq [rdi + 0], mm0
+; movq [rdi + 8], mm1
+; movq [rdi + 16], mm2
+; movq [rdi + 24], mm3
+
+ movd eax, mm0
+ psrlq mm0, 32
+ mov word ptr[rdi+32*0], ax
+ shr eax, 16
+ mov word ptr[rdi+32*1], ax
+ movd eax, mm0
+ mov word ptr[rdi+32*2], ax
+ shr eax, 16
+ mov word ptr[rdi+32*3], ax
+
+ movd ecx, mm1
+ psrlq mm1, 32
+ mov word ptr[rdi+32*4], cx
+ shr ecx, 16
+ mov word ptr[rdi+32*5], cx
+ movd ecx, mm1
+ mov word ptr[rdi+32*6], cx
+ shr ecx, 16
+ mov word ptr[rdi+32*7], cx
+
+ movd eax, mm2
+ psrlq mm2, 32
+ mov word ptr[rdi+32*8], ax
+ shr eax, 16
+ mov word ptr[rdi+32*9], ax
+ movd eax, mm2
+ mov word ptr[rdi+32*10], ax
+ shr eax, 16
+ mov word ptr[rdi+32*11], ax
+
+ movd ecx, mm3
+ psrlq mm3, 32
+ mov word ptr[rdi+32*12], cx
+ shr ecx, 16
+ mov word ptr[rdi+32*13], cx
+ movd ecx, mm3
+ mov word ptr[rdi+32*14], cx
+ shr ecx, 16
+ mov word ptr[rdi+32*15], cx
; begin epilog
pop rdi
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 1da4fd8da..5a7133d6c 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -96,8 +96,50 @@ sym(vp8_short_inv_walsh4x4_sse2):
psraw xmm5, 3
psraw xmm1, 3
- movdqa [rdi + 0], xmm5
- movdqa [rdi + 16], xmm1
+;; movdqa [rdi + 0], xmm5
+;; movdqa [rdi + 16], xmm1
+
+ movd eax, xmm5
+ psrldq xmm5, 4
+ mov word ptr[rdi+32*0], ax
+ shr eax, 16
+ mov word ptr[rdi+32*1], ax
+ movd eax, xmm5
+ psrldq xmm5, 4
+ mov word ptr[rdi+32*2], ax
+ shr eax, 16
+ mov word ptr[rdi+32*3], ax
+
+ movd eax, xmm5
+ psrldq xmm5, 4
+ mov word ptr[rdi+32*4], ax
+ shr eax, 16
+ mov word ptr[rdi+32*5], ax
+ movd eax, xmm5
+ mov word ptr[rdi+32*6], ax
+ shr eax, 16
+ mov word ptr[rdi+32*7], ax
+
+ movd eax, xmm1
+ psrldq xmm1, 4
+ mov word ptr[rdi+32*8], ax
+ shr eax, 16
+ mov word ptr[rdi+32*9], ax
+ movd eax, xmm1
+ psrldq xmm1, 4
+ mov word ptr[rdi+32*10], ax
+ shr eax, 16
+ mov word ptr[rdi+32*11], ax
+
+ movd eax, xmm1
+ psrldq xmm1, 4
+ mov word ptr[rdi+32*12], ax
+ shr eax, 16
+ mov word ptr[rdi+32*13], ax
+ movd eax, xmm1
+ mov word ptr[rdi+32*14], ax
+ shr eax, 16
+ mov word ptr[rdi+32*15], ax
; begin epilog
pop rdi
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index c4e616a67..b24cbe48f 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -40,9 +40,6 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
-
-
rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx;
rtcd->recon.copy8x4 = vp8_copy_mem8x4_mmx;