From 8ba58951e9db28dccaf9adbd32eb24de28fa5e67 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Fri, 15 Apr 2011 12:57:15 -0400 Subject: Handle long delay between video frames in multi-thread decoder(issue 312) This is reported by m...@hesotech.de (see issue 312): "The decoder causes an access violation when you decode the first frame, then make a pause of about 60 seconds and then decode further frames. But only if vpx_codec_dec_cfg_t.threads> 1. This is caused by a timeout of WaitForSingleObject. When I change the definition of VPXINFINITE to INFINITE(0xFFFFFFFF), the problem is solved." Reproduced the crash and verified the changes on Windows platform. This brings the behavior inline with the other platforms using sem_wait(). Change-Id: I27b32f90bce05846ef2684b50f7a88f292299da1 --- vp8/common/threading.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'vp8/common') diff --git a/vp8/common/threading.h b/vp8/common/threading.h index 44eaf0800..b7542b306 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -12,8 +12,6 @@ #ifndef _PTHREAD_EMULATION #define _PTHREAD_EMULATION -#define VPXINFINITE 10000 /* 10second. */ - #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ @@ -28,7 +26,7 @@ #define pthread_t HANDLE #define pthread_attr_t DWORD #define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL) -#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) +#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) #define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) #define thread_sleep(nms) Sleep(nms) #define pthread_cancel(thread) terminate_thread(thread,0) @@ -62,7 +60,7 @@ #define sem_t HANDLE #define pause(voidpara) __asm PAUSE #define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL) -#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE)) +#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) #define sem_post(sem) SetEvent(*sem) #define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) #define thread_sleep(nms) Sleep(nms) -- cgit v1.2.3 From c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 7 Apr 2011 13:17:22 -0400 Subject: Add save/restore xmm registers in x86 assembly code Went through the code and fixed it. Verified on Windows. Where possible, remove dependencies on xmm[67] Current code relies on pushing rbp to the stack to get 16 byte alignment. This broke when rbp wasn't pushed (vp8/encoder/x86/sad_sse3.asm). Work around this by using unaligned memory accesses. Revisit this and the offsets in vp8/encoder/x86/sad_sse3.asm in another change to SAVE_XMM. Change-Id: I5f940994d3ebfd977c3d68446cef20fd78b07877 --- vp8/common/x86/idctllm_sse2.asm | 50 +++++++++++++++++++++------------------ vp8/common/x86/subpixel_ssse3.asm | 12 ++++++++++ 2 files changed, 39 insertions(+), 23 deletions(-) (limited to 'vp8/common') diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm index edee1578e..c873869ab 100644 --- a/vp8/common/x86/idctllm_sse2.asm +++ b/vp8/common/x86/idctllm_sse2.asm @@ -32,9 +32,6 @@ sym(idct_dequant_0_2x_sse2): mov rdx, arg(1) ; dequant mov rax, arg(0) ; qcoeff - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - movd xmm4, [rax] movd xmm5, [rdx] @@ -43,9 +40,12 @@ sym(idct_dequant_0_2x_sse2): pmullw xmm4, xmm5 + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 + ; clear coeffs - movd [rax], xmm7 - movd [rax+32], xmm7 + movd [rax], xmm5 + movd [rax+32], xmm5 ;pshufb pshuflw xmm4, xmm4, 00000000b pshufhw xmm4, xmm4, 00000000b @@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2): lea rcx, [3*rcx] movq xmm3, [rax+rcx] - punpcklbw xmm0, xmm7 - punpcklbw xmm1, xmm7 - punpcklbw xmm2, xmm7 - punpcklbw xmm3, xmm7 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 mov rax, arg(3) ; dst movsxd rdx, dword ptr arg(4) ; dst_stride @@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2): paddw xmm3, xmm4 ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 ; store blocks back out movq [rax], xmm0 @@ -102,6 +102,7 @@ sym(idct_dequant_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -347,6 +348,7 @@ sym(idct_dequant_full_2x_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -377,8 +379,8 @@ sym(idct_dequant_dc_0_2x_sse2): mov rdi, arg(3) ; dst mov rdx, arg(5) ; dc - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 ; load up 2 dc words here == 2*16 = doubleword movd xmm4, [rdx] @@ -398,10 +400,10 @@ sym(idct_dequant_dc_0_2x_sse2): psraw xmm4, 3 ; Predict buffer needs to be expanded from bytes to words - punpcklbw xmm0, xmm7 - punpcklbw xmm1, xmm7 - punpcklbw xmm2, xmm7 - punpcklbw xmm3, xmm7 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 ; Add to predict buffer paddw xmm0, xmm4 @@ -410,10 +412,10 @@ sym(idct_dequant_dc_0_2x_sse2): paddw xmm3, xmm4 ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 ; Load destination stride before writing out, ; doesn't need to persist @@ -441,6 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -692,6 +695,7 @@ sym(idct_dequant_dc_full_2x_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index 0ec18de76..1db3d629c 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -39,6 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -107,6 +108,7 @@ filter_block1d8_h6_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -162,6 +164,7 @@ filter_block1d8_h4_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -286,6 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -393,6 +397,7 @@ filter_block1d4_h4_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -413,6 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -508,6 +514,7 @@ vp8_filter_block1d16_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -580,6 +587,7 @@ vp8_filter_block1d16_v4_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -598,6 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -670,6 +679,7 @@ vp8_filter_block1d8_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -718,6 +728,7 @@ vp8_filter_block1d8_v4_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -808,6 +819,7 @@ vp8_filter_block1d4_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret -- cgit v1.2.3 From 4a2b684ef4b361b805be8e0db972cbe9b7e24752 Mon Sep 17 00:00:00 2001 From: Johann Date: Fri, 15 Apr 2011 10:05:20 -0400 Subject: modify SAVE_XMM for potential 64bit use the win64 abi requires saving and restoring xmm6:xmm15. currently SAVE_XMM and RESTORE XMM only allow for saving xmm6:xmm7. allow specifying the highest register used and if the stack is unaligned. Change-Id: Ica5699622ffe3346d3a486f48eef0206c51cf867 --- vp8/common/x86/idctllm_sse2.asm | 4 ++-- vp8/common/x86/iwalsh_sse2.asm | 10 +++++----- vp8/common/x86/loopfilter_sse2.asm | 20 ++++++++++---------- vp8/common/x86/postproc_sse2.asm | 6 +++--- vp8/common/x86/recon_sse2.asm | 2 +- vp8/common/x86/subpixel_sse2.asm | 20 +++++++++----------- vp8/common/x86/subpixel_ssse3.asm | 15 +++++++-------- 7 files changed, 37 insertions(+), 40 deletions(-) (limited to 'vp8/common') diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm index c873869ab..34a7e18ae 100644 --- a/vp8/common/x86/idctllm_sse2.asm +++ b/vp8/common/x86/idctllm_sse2.asm @@ -102,7 +102,7 @@ sym(idct_dequant_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -443,7 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index 83c97df7d..1da4fd8da 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 - SAVE_XMM + SAVE_XMM 6 push rsi push rdi ; end prolog @@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2): movdqa xmm4, xmm0 punpcklqdq xmm0, xmm3 ;d1 a1 punpckhqdq xmm4, xmm3 ;c1 b1 - movd xmm7, eax + movd xmm6, eax movdqa xmm1, xmm4 ;c1 b1 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] @@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2): pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] movdqa xmm3, xmm4 ;ip[4] ip[0] - pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03 + pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 @@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2): punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - paddw xmm5, xmm7 - paddw xmm1, xmm7 + paddw xmm5, xmm6 + paddw xmm1, xmm6 psraw xmm5, 3 psraw xmm1, 3 diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 849133dc4..c2ce1a106 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -288,7 +288,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -338,7 +338,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -584,7 +584,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -634,7 +634,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1024,7 +1024,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1091,7 +1091,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1249,7 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1318,7 +1318,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1386,7 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1503,7 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx ; save callee-saved reg push rsi push rdi diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 30b4bf53a..06d51ec6f 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index 4ad3973ec..67b6420a9 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 - SAVE_XMM + SAVE_XMM 7 push rsi push rdi ; end prolog diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index b87cad259..83e3b1479 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 - ;SAVE_XMM ;xmm6, xmm7 are not used here. GET_GOT rbx push rsi push rdi @@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT - ;RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index 1db3d629c..1ddbc54bd 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -39,7 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -182,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -289,7 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -418,7 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -606,7 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -819,7 +819,6 @@ vp8_filter_block1d4_v6_ssse3_loop: pop rdi pop rsi RESTORE_GOT - RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -886,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi @@ -1149,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - SAVE_XMM + SAVE_XMM 7 GET_GOT rbx push rsi push rdi -- cgit v1.2.3 From 09c933ea804c37c4aa95c15a67769dcf1a27b5a1 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Tue, 19 Apr 2011 11:42:15 -0400 Subject: Removed redundant checks of the mode_info_context flags Code cleanup. The build inter predictor functions are redundantly checking the mode_info_context for either INTRA_FRAME or SPLITMV. Change-Id: I4d58c3a5192a4c2cec5c24ab1caf608bf13aebfb --- vp8/common/reconinter.c | 186 ++++++++++++++++++------------------------------ vp8/common/reconinter.h | 3 +- 2 files changed, 73 insertions(+), 116 deletions(-) (limited to 'vp8/common') diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 7cfab4140..6862bae11 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -207,12 +207,12 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch) } +/*encoder only*/ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) { int i; - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = &x->predictor[256]; @@ -257,69 +257,32 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) } /*encoder only*/ -void vp8_build_inter_predictors_mby(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) { + unsigned char *ptr_base; + unsigned char *ptr; + unsigned char *pred_ptr = x->predictor; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->block[0].pre_stride; - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) - { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = x->predictor; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->block[0].pre_stride; - - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + ptr_base = x->pre.y_buffer; + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); } else { - int i; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - for (i = 0; i < 4; i++) - { - BLOCKD *d = &x->block[bbb[i]]; - build_inter_predictors4b(x, d, 16); - } - - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, 16); - else - { - vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict); - } - - } - } + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); } } void vp8_build_inter_predictors_mb(MACROBLOCKD *x) { - if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && - x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { int offset; unsigned char *ptr_base; @@ -535,58 +498,58 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp -void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x) { - /*unsigned char *pred_ptr = x->block[0].predictor; - unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/ - unsigned char *pred_ptr = x->predictor; unsigned char *dst_ptr = x->dst.y_buffer; - if (x->mode_info_context->mbmi.mode != SPLITMV) - { - int offset; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *uptr, *vptr; - /*unsigned char *pred_ptr = x->predictor; - unsigned char *upred_ptr = &x->predictor[256]; - unsigned char *vpred_ptr = &x->predictor[320];*/ - unsigned char *udst_ptr = x->dst.u_buffer; - unsigned char *vdst_ptr = x->dst.v_buffer; + int offset; + unsigned char *ptr_base; + unsigned char *ptr; + unsigned char *uptr, *vptr; + unsigned char *udst_ptr = x->dst.u_buffer; + unsigned char *vdst_ptr = x->dst.v_buffer; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + ptr_base = x->pre.y_buffer; + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } - mv_row = x->block[16].bmi.mv.as_mv.row; - mv_col = x->block[16].bmi.mv.as_mv.col; - pre_stride >>= 1; - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; + mv_row = x->block[16].bmi.mv.as_mv.row; + mv_col = x->block[16].bmi.mv.as_mv.col; + pre_stride >>= 1; + offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); + uptr = x->pre.u_buffer + offset; + vptr = x->pre.v_buffer + offset; - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride); - RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride); - } + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride); + x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride); + } +} +void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) +{ + unsigned char *dst_ptr = x->dst.y_buffer; + + if (x->mode_info_context->mbmi.mode != SPLITMV) + { + vp8_build_inter16x16_predictors_mb_s(x); } else { @@ -599,25 +562,20 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) { for (i = 0; i < 4; i++) { + unsigned char *ptr_base; + unsigned char *ptr; BLOCKD *d = &x->block[bbb[i]]; - /*build_inter_predictors4b(x, d, 16);*/ - { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + ptr_base = *(d->base_pre); + ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - ptr_base = *(d->base_pre); - ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } } } @@ -633,7 +591,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) /*build_inter_predictors2b(x, d0, 16);*/ unsigned char *ptr_base; unsigned char *ptr; - unsigned char *pred_ptr = d0->predictor; ptr_base = *(d0->base_pre); ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3); @@ -665,7 +622,6 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) /*build_inter_predictors2b(x, d0, 8);*/ unsigned char *ptr_base; unsigned char *ptr; - unsigned char *pred_ptr = d0->predictor; ptr_base = *(d0->base_pre); ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3); diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index 7c1dee431..688bebe96 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -14,8 +14,9 @@ extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x); -extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x); extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel); extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf); extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x); -- cgit v1.2.3 From 7a49accd0b65453057762929efc7eed93deba043 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Wed, 20 Apr 2011 15:45:12 -0400 Subject: Removed force_no_skip force_no_skip is always set to zero. Change-Id: I89b61c5e0bee34627a9c07c05f3517e1db76af77 --- vp8/common/blockd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'vp8/common') diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index fc8e0722c..cfe9e1572 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -173,8 +173,6 @@ typedef struct unsigned char need_to_clamp_mvs; unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */ - - unsigned char force_no_skip; /* encoder only */ } MB_MODE_INFO; -- cgit v1.2.3