summaryrefslogtreecommitdiff
path: root/vp8/common/x86/recon_sse2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common/x86/recon_sse2.asm')
-rw-r--r--vp8/common/x86/recon_sse2.asm101
1 files changed, 57 insertions, 44 deletions
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 4b68ef5f2..d371ebd74 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -119,35 +119,39 @@ sym(vp8_copy_mem16x16_sse2):
;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2)
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; from left
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
- movzx ecx, byte [rsi+rax]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax*1]
+ add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
+
+
movzx edx, byte [rsi+rdi]
- add ecx, edx
lea rsi, [rsi+rax*4]
+ add ecx, edx
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
@@ -156,8 +160,6 @@ sym(vp8_intra_pred_uv_dc_mmx2):
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
- movzx edx, byte [rsi+rax*4]
- add ecx, edx
; add up
pextrw edx, mm1, 0x0
@@ -192,23 +194,24 @@ sym(vp8_intra_pred_uv_dc_mmx2):
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2)
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
+ ;arg(3), arg(4) not used
+
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
@@ -245,22 +248,24 @@ sym(vp8_intra_pred_uv_dctop_mmx2):
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2)
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ ;arg(2) not used
+
; from left
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
@@ -310,17 +315,20 @@ sym(vp8_intra_pred_uv_dcleft_mmx2):
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx)
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
; end prolog
+ ;arg(2), arg(3), arg(4) not used
+
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
@@ -346,15 +354,16 @@ sym(vp8_intra_pred_uv_dc128_mmx):
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1)
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
@@ -362,9 +371,8 @@ sym(vp8_intra_pred_uv_tm_%1):
; read top row
mov edx, 4
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above
+ movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
@@ -374,7 +382,7 @@ sym(vp8_intra_pred_uv_tm_%1):
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
- lea rsi, [rsi+rax-1]
+ mov rsi, arg(3) ;left;
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
@@ -427,20 +435,22 @@ vp8_intra_pred_uv_tm ssse3
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx)
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
; end prolog
+ ; arg(3), arg(4) not used
+
; read from top
mov rax, arg(2) ;src;
- movsxd rdx, dword ptr arg(3) ;src_stride;
- sub rax, rdx
+
movq mm1, [rax]
; write out
@@ -466,15 +476,16 @@ sym(vp8_intra_pred_uv_ve_mmx):
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1)
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
%ifidn %1, ssse3
@@ -485,12 +496,14 @@ sym(vp8_intra_pred_uv_ho_%1):
%endif
; end prolog
+ ;arg(2) not used
+
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rsi, arg(3) ;left
+ movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
@@ -498,7 +511,7 @@ sym(vp8_intra_pred_uv_ho_%1):
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
- dec rsi
+
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]