summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2011-04-25 09:13:41 -0400
committerJohn Koleszar <jkoleszar@google.com>2011-04-25 09:13:41 -0400
commit308e31a3ef97fa7a5bf9a232b15587955e5ec89f (patch)
tree5cdcdd3b0df930034695d491c722bd57b1a48a3e /vp8/common
parent1cf1ea94704b759aad8bacab0b7fa833ac7e452e (diff)
parent5227798c570af08d08dd6fdd7a3e96d5dc96977b (diff)
downloadlibvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar
libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar.gz
libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar.bz2
libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.zip
Merge remote branch 'internal/upstream-experimental' into HEAD
Conflicts: vp8/decoder/onyxd_int.h Change-Id: Icf445b589c2bc61d93d8c977379bbd84387d0488
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/blockd.h2
-rw-r--r--vp8/common/reconinter.c186
-rw-r--r--vp8/common/reconinter.h3
-rw-r--r--vp8/common/threading.h6
-rw-r--r--vp8/common/x86/idctllm_sse2.asm50
-rw-r--r--vp8/common/x86/iwalsh_sse2.asm10
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm20
-rw-r--r--vp8/common/x86/postproc_sse2.asm6
-rw-r--r--vp8/common/x86/recon_sse2.asm2
-rw-r--r--vp8/common/x86/subpixel_sse2.asm20
-rw-r--r--vp8/common/x86/subpixel_ssse3.asm17
11 files changed, 144 insertions, 178 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 906e05520..3c3592aab 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -175,8 +175,6 @@ typedef struct
unsigned char need_to_clamp_mvs;
unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
-
- unsigned char force_no_skip; /* encoder only */
} MB_MODE_INFO;
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 7cfab4140..6862bae11 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -207,12 +207,12 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
}
+/*encoder only*/
void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
{
int i;
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
@@ -257,69 +257,32 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
}
/*encoder only*/
-void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
+void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
{
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = x->predictor;
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+ int pre_stride = x->block[0].pre_stride;
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
- {
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *pred_ptr = x->predictor;
- int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
- int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
- int pre_stride = x->block[0].pre_stride;
-
- ptr_base = x->pre.y_buffer;
- ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ ptr_base = x->pre.y_buffer;
+ ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
- }
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
}
else
{
- int i;
-
- if (x->mode_info_context->mbmi.partitioning < 3)
- {
- for (i = 0; i < 4; i++)
- {
- BLOCKD *d = &x->block[bbb[i]];
- build_inter_predictors4b(x, d, 16);
- }
-
- }
- else
- {
- for (i = 0; i < 16; i += 2)
- {
- BLOCKD *d0 = &x->block[i];
- BLOCKD *d1 = &x->block[i+1];
-
- if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
- build_inter_predictors2b(x, d0, 16);
- else
- {
- vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
- vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
- }
-
- }
- }
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
}
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
@@ -535,58 +498,58 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp
-void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
+void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x)
{
- /*unsigned char *pred_ptr = x->block[0].predictor;
- unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
- unsigned char *pred_ptr = x->predictor;
unsigned char *dst_ptr = x->dst.y_buffer;
- if (x->mode_info_context->mbmi.mode != SPLITMV)
- {
- int offset;
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *uptr, *vptr;
- /*unsigned char *pred_ptr = x->predictor;
- unsigned char *upred_ptr = &x->predictor[256];
- unsigned char *vpred_ptr = &x->predictor[320];*/
- unsigned char *udst_ptr = x->dst.u_buffer;
- unsigned char *vdst_ptr = x->dst.v_buffer;
+ int offset;
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *uptr, *vptr;
+ unsigned char *udst_ptr = x->dst.u_buffer;
+ unsigned char *vdst_ptr = x->dst.v_buffer;
- int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
- int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
- int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+ int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
- ptr_base = x->pre.y_buffer;
- ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ ptr_base = x->pre.y_buffer;
+ ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
- mv_row = x->block[16].bmi.mv.as_mv.row;
- mv_col = x->block[16].bmi.mv.as_mv.col;
- pre_stride >>= 1;
- offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
- uptr = x->pre.u_buffer + offset;
- vptr = x->pre.v_buffer + offset;
+ mv_row = x->block[16].bmi.mv.as_mv.row;
+ mv_col = x->block[16].bmi.mv.as_mv.col;
+ pre_stride >>= 1;
+ offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ uptr = x->pre.u_buffer + offset;
+ vptr = x->pre.v_buffer + offset;
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
- x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
- }
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
+ x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
+ }
+}
+void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
+{
+ unsigned char *dst_ptr = x->dst.y_buffer;
+
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ vp8_build_inter16x16_predictors_mb_s(x);
}
else
{
@@ -599,25 +562,20 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
{
for (i = 0; i < 4; i++)
{
+ unsigned char *ptr_base;
+ unsigned char *ptr;
BLOCKD *d = &x->block[bbb[i]];
- /*build_inter_predictors4b(x, d, 16);*/
- {
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *pred_ptr = d->predictor;
+ ptr_base = *(d->base_pre);
+ ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
- ptr_base = *(d->base_pre);
- ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
-
- if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
- {
- x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
+ if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
+ {
+ x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
}
}
@@ -633,7 +591,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
/*build_inter_predictors2b(x, d0, 16);*/
unsigned char *ptr_base;
unsigned char *ptr;
- unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
@@ -665,7 +622,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
/*build_inter_predictors2b(x, d0, 8);*/
unsigned char *ptr_base;
unsigned char *ptr;
- unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 7c1dee431..688bebe96 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -14,8 +14,9 @@
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x);
+extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x);
-extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x);
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index 44eaf0800..b7542b306 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,8 +12,6 @@
#ifndef _PTHREAD_EMULATION
#define _PTHREAD_EMULATION
-#define VPXINFINITE 10000 /* 10second. */
-
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
@@ -28,7 +26,7 @@
#define pthread_t HANDLE
#define pthread_attr_t DWORD
#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
-#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
+#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
#define thread_sleep(nms) Sleep(nms)
#define pthread_cancel(thread) terminate_thread(thread,0)
@@ -62,7 +60,7 @@
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL)
-#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE))
+#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
#define sem_post(sem) SetEvent(*sem)
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
#define thread_sleep(nms) Sleep(nms)
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index edee1578e..34a7e18ae 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -32,9 +32,6 @@ sym(idct_dequant_0_2x_sse2):
mov rdx, arg(1) ; dequant
mov rax, arg(0) ; qcoeff
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
-
movd xmm4, [rax]
movd xmm5, [rdx]
@@ -43,9 +40,12 @@ sym(idct_dequant_0_2x_sse2):
pmullw xmm4, xmm5
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
; clear coeffs
- movd [rax], xmm7
- movd [rax+32], xmm7
+ movd [rax], xmm5
+ movd [rax+32], xmm5
;pshufb
pshuflw xmm4, xmm4, 00000000b
pshufhw xmm4, xmm4, 00000000b
@@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2):
lea rcx, [3*rcx]
movq xmm3, [rax+rcx]
- punpcklbw xmm0, xmm7
- punpcklbw xmm1, xmm7
- punpcklbw xmm2, xmm7
- punpcklbw xmm3, xmm7
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
mov rax, arg(3) ; dst
movsxd rdx, dword ptr arg(4) ; dst_stride
@@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2):
paddw xmm3, xmm4
; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
; store blocks back out
movq [rax], xmm0
@@ -102,6 +102,7 @@ sym(idct_dequant_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -347,6 +348,7 @@ sym(idct_dequant_full_2x_sse2):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -377,8 +379,8 @@ sym(idct_dequant_dc_0_2x_sse2):
mov rdi, arg(3) ; dst
mov rdx, arg(5) ; dc
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
; load up 2 dc words here == 2*16 = doubleword
movd xmm4, [rdx]
@@ -398,10 +400,10 @@ sym(idct_dequant_dc_0_2x_sse2):
psraw xmm4, 3
; Predict buffer needs to be expanded from bytes to words
- punpcklbw xmm0, xmm7
- punpcklbw xmm1, xmm7
- punpcklbw xmm2, xmm7
- punpcklbw xmm3, xmm7
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
; Add to predict buffer
paddw xmm0, xmm4
@@ -410,10 +412,10 @@ sym(idct_dequant_dc_0_2x_sse2):
paddw xmm3, xmm4
; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
; Load destination stride before writing out,
; doesn't need to persist
@@ -441,6 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -692,6 +695,7 @@ sym(idct_dequant_dc_full_2x_sse2):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 83c97df7d..1da4fd8da 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
- SAVE_XMM
+ SAVE_XMM 6
push rsi
push rdi
; end prolog
@@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
movdqa xmm4, xmm0
punpcklqdq xmm0, xmm3 ;d1 a1
punpckhqdq xmm4, xmm3 ;c1 b1
- movd xmm7, eax
+ movd xmm6, eax
movdqa xmm1, xmm4 ;c1 b1
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
@@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
movdqa xmm3, xmm4 ;ip[4] ip[0]
- pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03
+ pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
@@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2):
punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- paddw xmm5, xmm7
- paddw xmm1, xmm7
+ paddw xmm5, xmm6
+ paddw xmm1, xmm6
psraw xmm5, 3
psraw xmm1, 3
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 849133dc4..c2ce1a106 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -288,7 +288,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -338,7 +338,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -584,7 +584,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -634,7 +634,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1024,7 +1024,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1091,7 +1091,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1249,7 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1318,7 +1318,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1386,7 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1503,7 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 30b4bf53a..06d51ec6f 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 4ad3973ec..67b6420a9 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
- SAVE_XMM
+ SAVE_XMM 7
push rsi
push rdi
; end prolog
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index b87cad259..83e3b1479 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- ;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx
push rsi
push rdi
@@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
- ;RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 0ec18de76..1ddbc54bd 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -39,6 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -107,6 +108,7 @@ filter_block1d8_h6_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -162,6 +164,7 @@ filter_block1d8_h4_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -179,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -286,6 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -393,6 +397,7 @@ filter_block1d4_h4_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -413,6 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -508,6 +514,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -580,6 +587,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -598,6 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -670,6 +679,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -718,6 +728,7 @@ vp8_filter_block1d8_v4_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -874,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1137,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi