diff options
author | John Koleszar <jkoleszar@google.com> | 2011-04-25 09:13:41 -0400 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2011-04-25 09:13:41 -0400 |
commit | 308e31a3ef97fa7a5bf9a232b15587955e5ec89f (patch) | |
tree | 5cdcdd3b0df930034695d491c722bd57b1a48a3e /vp8/common | |
parent | 1cf1ea94704b759aad8bacab0b7fa833ac7e452e (diff) | |
parent | 5227798c570af08d08dd6fdd7a3e96d5dc96977b (diff) | |
download | libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar.gz libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.tar.bz2 libvpx-308e31a3ef97fa7a5bf9a232b15587955e5ec89f.zip |
Merge remote branch 'internal/upstream-experimental' into HEAD
Conflicts:
vp8/decoder/onyxd_int.h
Change-Id: Icf445b589c2bc61d93d8c977379bbd84387d0488
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/blockd.h | 2 | ||||
-rw-r--r-- | vp8/common/reconinter.c | 186 | ||||
-rw-r--r-- | vp8/common/reconinter.h | 3 | ||||
-rw-r--r-- | vp8/common/threading.h | 6 | ||||
-rw-r--r-- | vp8/common/x86/idctllm_sse2.asm | 50 | ||||
-rw-r--r-- | vp8/common/x86/iwalsh_sse2.asm | 10 | ||||
-rw-r--r-- | vp8/common/x86/loopfilter_sse2.asm | 20 | ||||
-rw-r--r-- | vp8/common/x86/postproc_sse2.asm | 6 | ||||
-rw-r--r-- | vp8/common/x86/recon_sse2.asm | 2 | ||||
-rw-r--r-- | vp8/common/x86/subpixel_sse2.asm | 20 | ||||
-rw-r--r-- | vp8/common/x86/subpixel_ssse3.asm | 17 |
11 files changed, 144 insertions, 178 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 906e05520..3c3592aab 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -175,8 +175,6 @@ typedef struct unsigned char need_to_clamp_mvs; unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */ - - unsigned char force_no_skip; /* encoder only */ } MB_MODE_INFO; diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 7cfab4140..6862bae11 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -207,12 +207,12 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch) } +/*encoder only*/ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) { int i; - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = &x->predictor[256]; @@ -257,69 +257,32 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) } /*encoder only*/ -void vp8_build_inter_predictors_mby(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) { + unsigned char *ptr_base; + unsigned char *ptr; + unsigned char *pred_ptr = x->predictor; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->block[0].pre_stride; - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) - { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = x->predictor; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->block[0].pre_stride; - - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + ptr_base = x->pre.y_buffer; + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); } else { - int i; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - for (i = 0; i < 4; i++) - { - BLOCKD *d = &x->block[bbb[i]]; - build_inter_predictors4b(x, d, 16); - } - - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, 16); - else - { - vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict); - } - - } - } + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); } } void vp8_build_inter_predictors_mb(MACROBLOCKD *x) { - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { int offset; unsigned char *ptr_base; @@ -535,58 +498,58 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp -void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x) { - /*unsigned char *pred_ptr = x->block[0].predictor; - unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/ - unsigned char *pred_ptr = x->predictor; unsigned char *dst_ptr = x->dst.y_buffer; - if (x->mode_info_context->mbmi.mode != SPLITMV) - { - int offset; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *uptr, *vptr; - /*unsigned char *pred_ptr = x->predictor; - unsigned char *upred_ptr = &x->predictor[256]; - unsigned char *vpred_ptr = &x->predictor[320];*/ - unsigned char *udst_ptr = x->dst.u_buffer; - unsigned char *vdst_ptr = x->dst.v_buffer; + int offset; + unsigned char *ptr_base; + unsigned char *ptr; + unsigned char *uptr, *vptr; + unsigned char *udst_ptr = x->dst.u_buffer; + unsigned char *vdst_ptr = x->dst.v_buffer; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + ptr_base = x->pre.y_buffer; + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } - mv_row = x->block[16].bmi.mv.as_mv.row; - mv_col = x->block[16].bmi.mv.as_mv.col; - pre_stride >>= 1; - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; + mv_row = x->block[16].bmi.mv.as_mv.row; + mv_col = x->block[16].bmi.mv.as_mv.col; + pre_stride >>= 1; + offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); + uptr = x->pre.u_buffer + offset; + vptr = x->pre.v_buffer + offset; - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride); - RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride); - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride); + x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride); + } +} +void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) +{ + unsigned char *dst_ptr = x->dst.y_buffer; + + if (x->mode_info_context->mbmi.mode != SPLITMV) + { + vp8_build_inter16x16_predictors_mb_s(x); } else { @@ -599,25 +562,20 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) { for (i = 0; i < 4; i++) { + unsigned char *ptr_base; + unsigned char *ptr; BLOCKD *d = &x->block[bbb[i]]; - /*build_inter_predictors4b(x, d, 16);*/ - { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + ptr_base = *(d->base_pre); + ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - ptr_base = *(d->base_pre); - ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } } } @@ -633,7 +591,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) /*build_inter_predictors2b(x, d0, 16);*/ unsigned char *ptr_base; unsigned char *ptr; - unsigned char *pred_ptr = d0->predictor; ptr_base = *(d0->base_pre); ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3); @@ -665,7 +622,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) /*build_inter_predictors2b(x, d0, 8);*/ unsigned char *ptr_base; unsigned char *ptr; - unsigned char *pred_ptr = d0->predictor; ptr_base = *(d0->base_pre); ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3); diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index 7c1dee431..688bebe96 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -14,8 +14,9 @@ extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x); -extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x); extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel); extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf); extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x); diff --git a/vp8/common/threading.h b/vp8/common/threading.h index 44eaf0800..b7542b306 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -12,8 +12,6 @@ #ifndef _PTHREAD_EMULATION #define _PTHREAD_EMULATION -#define VPXINFINITE 10000 /* 10second. */ - #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ @@ -28,7 +26,7 @@ #define pthread_t HANDLE #define pthread_attr_t DWORD #define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL) -#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) +#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) #define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) #define thread_sleep(nms) Sleep(nms) #define pthread_cancel(thread) terminate_thread(thread,0) @@ -62,7 +60,7 @@ #define sem_t HANDLE #define pause(voidpara) __asm PAUSE #define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL) -#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE)) +#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) #define sem_post(sem) SetEvent(*sem) #define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) #define thread_sleep(nms) Sleep(nms) diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm index edee1578e..34a7e18ae 100644 --- a/vp8/common/x86/idctllm_sse2.asm +++ b/vp8/common/x86/idctllm_sse2.asm @@ -32,9 +32,6 @@ sym(idct_dequant_0_2x_sse2): mov rdx, arg(1) ; dequant mov rax, arg(0) ; qcoeff - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - movd xmm4, [rax] movd xmm5, [rdx] @@ -43,9 +40,12 @@ sym(idct_dequant_0_2x_sse2): pmullw xmm4, xmm5 + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 + ; clear coeffs - movd [rax], xmm7 - movd [rax+32], xmm7 + movd [rax], xmm5 + movd [rax+32], xmm5 ;pshufb pshuflw xmm4, xmm4, 00000000b pshufhw xmm4, xmm4, 00000000b @@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2): lea rcx, [3*rcx] movq xmm3, [rax+rcx] - punpcklbw xmm0, xmm7 - punpcklbw xmm1, xmm7 - punpcklbw xmm2, xmm7 - punpcklbw xmm3, xmm7 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 mov rax, arg(3) ; dst movsxd rdx, dword ptr arg(4) ; dst_stride @@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2): paddw xmm3, xmm4 ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 ; store blocks back out movq [rax], xmm0 @@ -102,6 +102,7 @@ sym(idct_dequant_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -347,6 +348,7 @@ sym(idct_dequant_full_2x_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -377,8 +379,8 @@ sym(idct_dequant_dc_0_2x_sse2): mov rdi, arg(3) ; dst mov rdx, arg(5) ; dc - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 ; load up 2 dc words here == 2*16 = doubleword movd xmm4, [rdx] @@ -398,10 +400,10 @@ sym(idct_dequant_dc_0_2x_sse2): psraw xmm4, 3 ; Predict buffer needs to be expanded from bytes to words - punpcklbw xmm0, xmm7 - punpcklbw xmm1, xmm7 - punpcklbw xmm2, xmm7 - punpcklbw xmm3, xmm7 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 ; Add to predict buffer paddw xmm0, xmm4 @@ -410,10 +412,10 @@ sym(idct_dequant_dc_0_2x_sse2): paddw xmm3, xmm4 ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 ; Load destination stride before writing out, ; doesn't need to persist @@ -441,6 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -692,6 +695,7 @@ sym(idct_dequant_dc_full_2x_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index 83c97df7d..1da4fd8da 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 - SAVE_XMM + SAVE_XMM 6 push rsi push rdi ; end prolog @@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2): movdqa xmm4, xmm0 punpcklqdq xmm0, xmm3 ;d1 a1 punpckhqdq xmm4, xmm3 ;c1 b1 - movd xmm7, eax + movd xmm6, eax movdqa xmm1, xmm4 ;c1 b1 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] @@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2): pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] movdqa xmm3, xmm4 ;ip[4] ip[0] - pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03 + pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 @@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2): punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - paddw xmm5, xmm7 - paddw xmm1, xmm7 + paddw xmm5, xmm6 + paddw xmm1, xmm6 psraw xmm5, 3 psraw xmm1, 3 diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 849133dc4..c2ce1a106 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -288,7 +288,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -338,7 +338,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -584,7 +584,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -634,7 +634,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1024,7 +1024,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1091,7 +1091,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1249,7 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1318,7 +1318,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1386,7 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1503,7 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx ; save callee-saved reg push rsi push rdi diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 30b4bf53a..06d51ec6f 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index 4ad3973ec..67b6420a9 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 - SAVE_XMM + SAVE_XMM 7 push rsi push rdi ; end prolog diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index b87cad259..83e3b1479 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - ;SAVE_XMM ;xmm6, xmm7 are not used here. GET_GOT rbx push rsi push rdi @@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT - ;RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index 0ec18de76..1ddbc54bd 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -39,6 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -107,6 +108,7 @@ filter_block1d8_h6_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -162,6 +164,7 @@ filter_block1d8_h4_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -179,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -286,6 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -393,6 +397,7 @@ filter_block1d4_h4_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -413,6 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -508,6 +514,7 @@ vp8_filter_block1d16_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -580,6 +587,7 @@ vp8_filter_block1d16_v4_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -598,6 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -670,6 +679,7 @@ vp8_filter_block1d8_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -718,6 +728,7 @@ vp8_filter_block1d8_v4_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -874,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1137,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi |