From 56efffdcd1902510ac3da5b5d07b06cfdfc0d002 Mon Sep 17 00:00:00 2001 From: Ralph Giles Date: Tue, 8 Mar 2011 14:51:23 -0800 Subject: Fix an unused variable warning. Move the update of the loopfilter info to the same block where it is used. GCC 4.5 is not able trace the initialization of the local filter_info across the other calls between the two conditionals on pbi->common and issues an uninitialized variable warning. Change-Id: Ie4487b3714a096b3fb21608f6b0c74e745e3c6fc --- vp8/decoder/threading.c | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) (limited to 'vp8') diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 7fc901054..314a8d7fd 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -296,18 +296,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) } } - if(pbi->common.filter_level) - { - /*update loopfilter info*/ - Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; - filter_level = pbi->mt_baseline_filter_level[Segment]; - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units - * Apply any context driven MB level adjustment - */ - filter_level = vp8_adjust_mb_lf_value(xd, filter_level); - } - /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units */ @@ -362,7 +350,16 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) } } - /* loopfilter on this macroblock. */ + /* update loopfilter info */ + Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + filter_level = pbi->mt_baseline_filter_level[Segment]; + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + * Apply any context driven MB level adjustment + */ + filter_level = vp8_adjust_mb_lf_value(xd, filter_level); + + /* loopfilter on this macroblock. */ if (filter_level) { if (mb_col > 0) @@ -778,18 +775,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) } } - if(pbi->common.filter_level) - { - /* update loopfilter info */ - Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; - filter_level = pbi->mt_baseline_filter_level[Segment]; - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units - * Apply any context driven MB level adjustment - */ - filter_level = vp8_adjust_mb_lf_value(xd, filter_level); - } - /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units */ @@ -853,6 +838,15 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) } } + /* update loopfilter info */ + Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + filter_level = pbi->mt_baseline_filter_level[Segment]; + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + * Apply any context driven MB level adjustment + */ + filter_level = vp8_adjust_mb_lf_value(xd, filter_level); + /* loopfilter on this macroblock. */ if (filter_level) { -- cgit v1.2.3 From e54dcfe88d0af4ef73eb1a62f3ecee62428ac2c9 Mon Sep 17 00:00:00 2001 From: Attila Nagy Date: Tue, 8 Mar 2011 14:48:20 +0200 Subject: Add vp8_mse16x16_armv6 function Change-Id: I77e9f2f521a71089228f96e2db72524189364ffb --- vp8/encoder/arm/arm_csystemdependent.c | 4 +- vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm | 133 +++++++++++++++++++++++++++ vp8/encoder/arm/variance_arm.h | 4 + vp8/vp8cx_arm.mk | 1 + 4 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm (limited to 'vp8') diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index 73007d414..afd43042d 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -50,8 +50,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6; cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6; - /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6; + /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm new file mode 100644 index 000000000..a9060d76f --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm @@ -0,0 +1,133 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_mse16x16_armv6| + + ARM + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +; +;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. +; So, we can remove this part of calculation. + +|vp8_mse16x16_armv6| PROC + + push {r4-r9, lr} + mov r12, #16 ; set loop counter to 16 (=block height) + + mov r4, #0 ; initialize sse = 0 + +loop + ; 1st 4 pixels + ldr r5, [r0, #0x0] ; load 4 src pixels + ldr r6, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r8, r5, r6 ; calculate difference + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + ldr r5, [r0, #0x4] ; load 4 src pixels + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r6, [r2, #0x4] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + ldr r5, [r0, #0x8] ; load 4 src pixels + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r6, [r2, #0x8] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + ldr r5, [r0, #0xc] ; load 4 src pixels + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r6, [r2, #0xc] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + subs r12, r12, #1 ; next row + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + bne loop + + ; return stuff + ldr r1, [sp, #28] ; get address of sse + mov r0, r4 ; return sse + str r4, [r1] ; store sse + + pop {r4-r9, pc} + + ENDP + + END diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 7ac0ac08e..7ad7c76d3 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -20,6 +20,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6); +extern prototype_variance(vp8_mse16x16_armv6); #if !CONFIG_RUNTIME_CPU_DETECT @@ -32,6 +33,9 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6); #undef vp8_variance_var16x16 #define vp8_variance_var16x16 vp8_variance16x16_armv6 +#undef vp8_variance_mse16x16 +#define vp8_variance_mse16x16 vp8_mse16x16_armv6 + #undef vp8_variance_halfpixvar16x16_h #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6 diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index 7980a0f75..429898a61 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -36,6 +36,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar # encoder VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM) #File list for neon -- cgit v1.2.3